From 82cc0b60b71d61b0eb0f63c846ea2c83f53a63f7 Mon Sep 17 00:00:00 2001 From: sillysagiri Date: Fri, 16 May 2025 11:20:11 +0700 Subject: [PATCH] using online k-means to quantize image --- .gitignore | 3 +- CMakeLists.txt | 1 - readme.txt | 3 +- src/header.hpp | 62 +++++---- src/kmean.cpp | 341 +++++++++++++++++++++++++++++++++++++++++++++++++ src/main.cpp | 321 +++++++++++++++++++++++++--------------------- 6 files changed, 549 insertions(+), 182 deletions(-) create mode 100644 src/kmean.cpp diff --git a/.gitignore b/.gitignore index d73e005..3ded315 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .cache -build \ No newline at end of file +build +.continue \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 85931fc..a5968df 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,6 @@ project(${PROJECT_NAME} VERSION ${PROJECT_VERSION}) list(APPEND CMAKE_MODULE_PATH "/opt/cmake") -find_package(LIQ) find_package(STB) include(FetchContent) diff --git a/readme.txt b/readme.txt index ce21890..cc566f4 100644 --- a/readme.txt +++ b/readme.txt @@ -46,7 +46,7 @@ binary format: TODO: [x] bitpacking for indexed format - [ ] bitpacking for alpha indexed format + [x] bitpacking for alpha indexed format [ ] convert image using predefined palette [ ] output preview image [ ] improve error handling @@ -55,7 +55,6 @@ TODO: dependencies: argparse (https://github.com/p-ranav/argparse) stb_image (https://github.com/nothings/stb) - libimagequant (https://github.com/ImageOptim/libimagequant) murmurhash.c (https://github.com/jwerle/murmurhash.c) Timer.h from cherno (https://gist.github.com/TheCherno/b2c71c9291a4a1a29c889e76173c8d14) diff --git a/src/header.hpp b/src/header.hpp index 9da5c26..7fb4b94 100644 --- a/src/header.hpp +++ b/src/header.hpp @@ -41,39 +41,32 @@ enum Format Format_INDEXED_4, Format_INDEXED_16, Format_INDEXED_256, - Format_INDEXED_32A3, - Format_INDEXED_8A5, + Format_INDEXED_32A8, + Format_INDEXED_8A32, Format_PALETTE_16, }; -struct Image { - int width, height, comp; - stbi_uc *data; - - constexpr Image() : width(0), height(0), comp(0), data(nullptr) {} - Image(const std::filesystem::path &path); - ~Image(); - - void Load(const std::filesystem::path &path); -}; - -struct IndexedImage -{ - int width, height; - uint8_t *data; - - constexpr IndexedImage() : width(0), height(0), data(nullptr) {} - IndexedImage(int width, int height); - ~IndexedImage(); - - void Load(int width, int height); -}; - struct Color { - uint8_t a; - uint8_t r; - uint8_t g; - uint8_t b; + double r = 0; + double g = 0; + double b = 0; + double a = 255; + + inline uint16_t toRGB16(uint8_t alpha_threshold = 128) const { + const uint16_t alpha_bit = (a >= alpha_threshold) ? 0b1000000000000000 : 0; + return alpha_bit + | (((uint8_t)r >> 3) & 0b00011111) + | (((uint8_t)g >> 3) & 0b00011111) << 5 + | (((uint8_t)b >> 3) & 0b00011111) << 10; + } +}; + +struct Image { + int width = 0, height = 0; + std::vector data; + + ~Image() {} + void Load(const std::filesystem::path &path); }; struct Options { @@ -85,6 +78,11 @@ struct Options { void Verify(); void Convert(); -uint16_t RGB16(uint8_t a, uint8_t r, uint8_t g, uint8_t b); -void QuantizeImage(const int numColor, std::vector &images, std::vector &palette, std::vector &indexes); -void BitPacking(const uint8_t bpp, IndexedImage &img, uint32_t &length); \ No newline at end of file +std::vector QuantizePalette(const int numColor, const Image &image); +std::vector RemapImage(const Image &image, const std::vector &palette, const int &format); + +std::vector inc_online_kmeans ( const Image *img, const int num_colors, + const double lr_exp = 0.5, const double sample_rate = 0.5); + +/* Max. L_2^2 distance in 24-bit RGB space = 3 * 255 * 255 */ +#define MAX_RGB_DIST 195075 \ No newline at end of file diff --git a/src/kmean.cpp b/src/kmean.cpp new file mode 100644 index 0000000..5f4ac97 --- /dev/null +++ b/src/kmean.cpp @@ -0,0 +1,341 @@ +/* +https://github.com/AmberAbernathy/Color_Quantization/blob/main/test_km_algs.cpp + +Online K-Means (MacQueen, 1967) +Incremental Online K-Means (Abernathy & Celebi, 2022) + +Authors: Amber Abernathy & M. Emre Celebi + +Contact email: ecelebi@uca.edu + +If you find this program useful, please cite: +A. D. Abernathy and M. E. Celebi, +The Incremental Online K-Means Clustering Algorithm +and Its Application to Color Quantization, +Expert Systems with Applications, +in press, https://doi.org/10.1016/j.eswa.2022.117927, 2022. +*/ + +#include +#include +#include "header.hpp" + +typedef unsigned char uchar; +typedef unsigned int uint; +typedef unsigned long ulong; + +struct RGB_Cluster +{ + int size; + Color center; +}; + +/* Max. # colors that can be requested */ +#define MAX_NUM_COLORS 256 + +/* + Powers of two for 0, 1, ..., 16. Note that 2^16 must equal MAX_NUM_COLORS. + If you want to quantize to more than MAX_NUM_COLORS colors, extend the POW2 + array accordingly. + */ +static inline int POW2[] = { 1, 2, 4, 8, 16, 32, 64, 128, 256 }; + +/* + Function to generate two quasirandom numbers from + a 2D Sobol sequence. Adapted from Numerical Recipies + in C. Upon return, X and Y fall in [0,1). + */ + +#define MAX_BIT 30 + +void +sob_seq ( double *x, double *y ) +{ + int j, k, l; + ulong i, im, ipp; + static double fac; + static int init = 0; + static ulong ix1, ix2; + static ulong in, *iu[2 * MAX_BIT + 1]; + static ulong mdeg[3] = { 0, 1, 2 }; + static ulong ip[3] = { 0, 0, 1 }; + static ulong iv[2 * MAX_BIT + 1] = + { 0, 1, 1, 1, 1, 1, 1, 3, 1, 3, 3, 1, 1, 5, 7, 7, 3, 3, 5, 15, 11, 5, 15, 13, 9 }; + + if ( !init ) + { + init = 1; + for ( j = 1, k = 0; j <= MAX_BIT; j++, k += 2 ) + { + iu[j] = &iv[k]; + } + + for ( k = 1; k <= 2; k++ ) + { + for ( j = 1; j <= ( int ) mdeg[k]; j++ ) + { + iu[j][k] <<= ( MAX_BIT - j ); + } + + for ( j = mdeg[k] + 1; j <= MAX_BIT; j++ ) + { + ipp = ip[k]; + i = iu[j - mdeg[k]][k]; + i ^= ( i >> mdeg[k] ); + + for ( l = mdeg[k] - 1; l >= 1; l-- ) + { + if ( ipp & 1 ) + { + i ^= iu[j - l][k]; + } + + ipp >>= 1; + } + + iu[j][k] = i; + } + } + + fac = 1.0 / ( 1L << MAX_BIT ); + in = 0; + } + + im = in; + for ( j = 1; j <= MAX_BIT; j++ ) + { + if ( ! ( im & 1 ) ) + { + break; + } + + im >>= 1; + } + + im = (j - 1) * 2; + *x = (ix1 ^= iv[im + 1]) * fac; + *y = (ix2 ^= iv[im + 2]) * fac; + in++; +} + +#undef MAX_BIT + +/* + Function to determine if an integer is a power of 2: + http://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2 +*/ + +bool +is_pow2 ( const int x ) +{ + uint ux = ( uint ) x; + + return ux && !( ux & ( ux - 1 ) ); +} + +/* + Online K-Means Algorithm: + S. Thompson, M. E. Celebi, and K. H. Buck, + Fast Color Quantization Using MacQueen’s K-Means Algorithm, + Journal of Real-Time Image Processing, + 17(5): 1609-1624, 2020. + + Notes: + 1) LR_EXP: Learning rate exponent (must be in [0.5, 1]) + 2) SAMPLE_RATE: Fraction of the input pixels (must be in (0, 1]) + used during the clustering process. + 3) CLUST: When the function is called, CLUST represents the initial + centers. Upon return, CLUST represents the final centers. + */ + +void +online_kmeans ( const Image *img, const int num_colors, const double lr_exp, + const double sample_rate, RGB_Cluster *clust ) +{ + int min_dist_index; + int old_size, new_size; + int row_idx, col_idx; + int num_samples; + double sob_x, sob_y; + double del_red, del_green, del_blue; + double dist, min_dist; + double learn_rate; + Color rand_pixel; + + if ( lr_exp < 0.5 || lr_exp > 1. ) + { + fprintf ( stderr, "Learning rate exponent (%g) must be in [0.5, 1]\n", lr_exp ); + exit ( EXIT_FAILURE ); + } + else if ( sample_rate <= 0.0 || sample_rate > 1. ) + { + fprintf ( stderr, "Sampling rate (%g) must be in (0, 1]\n", sample_rate ); + exit ( EXIT_FAILURE ); + } + + num_samples = ( int ) ( sample_rate * (img->width*img->height) + 0.5 ); /* round */ + + for ( int i = 0; i < num_samples; i++ ) + { + /* Sample the image quasirandomly based on a Sobol' sequence */ + sob_seq ( &sob_x, &sob_y ); + + /* Find the corresponding row/column indices */ + row_idx = ( int ) ( sob_y * img->height + 0.5 ); /* round */ + if ( row_idx == img->height ) + { + row_idx--; + } + + col_idx = ( int ) ( sob_x * img->width + 0.5 ); /* round */ + if ( col_idx == img->width ) + { + col_idx--; + } + + rand_pixel = img->data[row_idx * img->width + col_idx]; + + /* Find the nearest center */ + min_dist = MAX_RGB_DIST; + min_dist_index = -INT_MAX; + for ( int j = 0; j < num_colors; j++ ) + { + del_red = clust[j].center.r - rand_pixel.r; + del_green = clust[j].center.g - rand_pixel.g; + del_blue = clust[j].center.b - rand_pixel.b; + + dist = del_red * del_red + del_green * del_green + del_blue * del_blue; + if ( dist < min_dist ) + { + min_dist = dist; + min_dist_index = j; + } + } + + /* Update the size of the nearest cluster */ + old_size = clust[min_dist_index].size; + new_size = old_size + 1; + + /* Compute the learning rate */ + learn_rate = pow ( new_size, -lr_exp ); + + /* Update the center of the nearest cluster */ + clust[min_dist_index].center.r += learn_rate * + ( rand_pixel.r - clust[min_dist_index].center.r ); + clust[min_dist_index].center.g += learn_rate * + ( rand_pixel.g - clust[min_dist_index].center.g ); + clust[min_dist_index].center.b += learn_rate * + ( rand_pixel.b - clust[min_dist_index].center.b ); + clust[min_dist_index].size = new_size; + } +} + +/* Function to compute the centroid of an image */ + +RGB_Cluster +compute_centroid ( const Image *img ) +{ + double sum_red = 0.0, sum_green = 0.0, sum_blue = 0.0; + Color pixel; + RGB_Cluster centroid; + + for (int i = 0; i < (img->width*img->height); i++) + { + pixel = img->data[i]; + sum_red += pixel.r; + sum_green += pixel.g; + sum_blue += pixel.b; + } + + centroid.center.r = sum_red / (img->width*img->height); + centroid.center.g = sum_green / (img->width*img->height); + centroid.center.b = sum_blue / (img->width*img->height); + + return centroid; +} + +/* + Incremental Online K-Means Algorithm: + + A. D. Abernathy and M. E. Celebi, + The Incremental Online K-Means Clustering Algorithm + and Its Application to Color Quantization, + Expert Systems with Applications, + accepted for publication, 2022. + + Notes: + 1) NUM_COLORS must be a power of 2 (otherwise the code must be + modified slightly, see Abernathy & Celebi, 2022). + 2) LR_EXP: Learning rate exponent (must be in [0.5, 1]) + 3) SAMPLE_RATE: Fraction of the input pixels (must be in (0, 1]) + used during the clustering process. + */ + +std::vector +inc_online_kmeans ( const Image *img, const int num_colors, + const double lr_exp, const double sample_rate ) +{ + int index, num_splits; + Color pixel; + RGB_Cluster *tmp_clust, *clust; + + if ( !is_pow2 ( num_colors ) ) + { + fprintf ( stderr, "Number of colors (%d) must be a power of 2!\n", num_colors ); + exit ( EXIT_FAILURE ); + } + + /* Compute log2 ( num_colors ) */ + num_splits = ( int ) ( log ( num_colors ) / log ( 2 ) + 0.5 ); /* round */ + + tmp_clust = ( RGB_Cluster * ) malloc ( ( 2 * num_colors - 1 ) * sizeof ( RGB_Cluster ) ); + clust = ( RGB_Cluster * ) malloc ( num_colors * sizeof ( RGB_Cluster ) ); + + /* Set first center to be the dataset centroid */ + tmp_clust[0] = compute_centroid ( img ); + tmp_clust[0].size = 0; + + for ( int t = 0; t < num_splits; t++ ) + { + for ( int n = POW2[t] - 1; n < POW2[t + 1] - 1; n++ ) + { + /* Split c_n into c_{2n + 1} and c_{2n + 2} */ + pixel = tmp_clust[n].center; + + /* Left child */ + index = 2 * n + 1; + tmp_clust[index].center.r = pixel.r; + tmp_clust[index].center.g = pixel.g; + tmp_clust[index].center.b = pixel.b; + tmp_clust[index].size = 0; + + /* Right child */ + index++; + tmp_clust[index].center.r = pixel.r; + tmp_clust[index].center.g = pixel.g; + tmp_clust[index].center.b = pixel.b; + tmp_clust[index].size = 0; + } + + /* Refine the new centers using online k-means */ + online_kmeans ( img, POW2[t + 1], lr_exp, sample_rate, + tmp_clust + POW2[t + 1] - 1 ); + } + + /* Last NUM_COLORS centers are the final centers */ + for ( int j = 0; j < num_colors; j++ ) + { + clust[j].center.r = tmp_clust[j + num_colors - 1].center.r; + clust[j].center.g = tmp_clust[j + num_colors - 1].center.g; + clust[j].center.b = tmp_clust[j + num_colors - 1].center.b; + } + + free (tmp_clust); + + std::vector result(num_colors); + for (int i=0; i -#include "libimagequant.h" #include "header.hpp" #include "Timer.h" #include "fastlz.h" @@ -50,9 +49,9 @@ int main(int argc, char **argv) .store_into(opt.path_output); program.add_argument("-f", "--format") - .help("texture format { rgb256, rgb16, indexed4, indexed16, indexed256, indexed32a3, indexed8a5 }") + .help("texture format { rgb256, rgb16, indexed4, indexed16, indexed256, indexed32a8, indexed8a32 }") .default_value("rgb16") - .choices("rgb256", "rgb16", "indexed4", "indexed16", "indexed256", "indexed32a3", "indexed8a5") + .choices("rgb256", "rgb16", "indexed4", "indexed16", "indexed256", "indexed32a8", "indexed8a32") .nargs(1) .store_into(opt.format); } @@ -96,10 +95,10 @@ int main(int argc, char **argv) meta.height = img.height; meta.palette_count = 0; meta.palette_hash = 0; - meta.original_size = img.width*img.height*img.comp; + meta.original_size = img.width*img.height*4; uint8_t compress[uint64_t(meta.original_size*1.5)]; - meta.compress_size = fastlz_compress_level(1, img.data, meta.original_size, compress); + meta.compress_size = fastlz_compress_level(1, img.data.data(), meta.original_size, compress); auto output = fs::path(opt.path_output) / std::format("{}.sillyimg", fs::path(input).stem().string()); std::ofstream out(output, std::ios::binary); @@ -119,14 +118,7 @@ int main(int argc, char **argv) // convert into RGB16 color for (int i=0; i palette; std::vector images(opt.path_input.size()); - std::vector indexes(opt.path_input.size()); for (int i=0; i 1) { - uint32_t length = indexes[i].width*indexes[i].height; - if (bpp < 8) BitPacking(bpp, indexes[i], length); + int combined_size = 0; + Image combined; + + // pre-calclate size + for (auto &image : images) + combined_size += image.data.size(); + + combined.data.reserve(combined_size); + + for (const auto& image : images) + combined.data.insert(combined.data.end(), image.data.begin(), image.data.end()); + + palette = QuantizePalette(numcol, combined); + } + else palette = QuantizePalette(numcol, images[0]); + + uint16_t palette16[palette.size()]; + for (int i=0; i(palette16), sizeof(uint16_t) * palette.size(), 0); + + for (int i=0; i remap = RemapImage(images[i], palette, format); + // std::vector remap = {1,2,3,4,54,5,6,7,78,23,34,32,32,12,5,34}; Metadata meta; - meta.format = Format::Format_RGB_16; - meta.width = indexes[i].width; - meta.height = indexes[i].height; - meta.palette_count = numcol; - meta.palette_hash = murmurhash(reinterpret_cast(palette16), sizeof(uint16_t) * palette.size(), 1); - meta.original_size = length; + meta.format = format; + meta.width = images[i].width; + meta.height = images[i].height; + meta.palette_count = palette.size(); + meta.palette_hash = hash; + meta.original_size = remap.size(); uint8_t compress[uint64_t(meta.original_size*1.5)]; - meta.compress_size = fastlz_compress_level(1, indexes[i].data, meta.original_size, compress); + meta.compress_size = fastlz_compress_level(1, remap.data(), meta.original_size, compress); auto output = fs::path(opt.path_output) / std::format("{}.sillyimg", fs::path(opt.path_input[i]).stem().string()); - std::ofstream out(output, std::ios::binary); + // std::ofstream out(output, std::ios::binary); - out.write(reinterpret_cast(&meta), sizeof(meta)); - out.write(reinterpret_cast(palette16), sizeof(uint16_t) * palette.size()); - out.write(reinterpret_cast(compress), meta.compress_size); - out.close(); + // out.write(reinterpret_cast(&meta), sizeof(meta)); + // out.write(reinterpret_cast(palette16), sizeof(uint16_t) * palette.size()); + // out.write(reinterpret_cast(compress), meta.compress_size); + // out.close(); } } - - if (opt.format == "indexed32a3" || opt.format == "indexed8a5") - throw std::runtime_error("format not supported yet!"); - } catch(const std::exception &e) { @@ -228,125 +248,134 @@ int main(int argc, char **argv) // // ---- -uint16_t RGB16(uint8_t a, uint8_t r, uint8_t g, uint8_t b) +std::vector QuantizePalette(const int numColor, const Image &image) { - // TODO: alpha threshold - uint16_t a1 = a & 0x01; - uint16_t r5 = (r >> 3) & 0x1F; - uint16_t g5 = (g >> 3) & 0x1F; - uint16_t b5 = (b >> 3) & 0x1F; - - return (a1 << 15) | (r5) | (g5 << 5) | (b5 << 10); + return inc_online_kmeans(&image, numColor); } -void QuantizeImage(const int numColor, std::vector &images, std::vector &palette, std::vector &indexes) +std::vector RemapImage(const Image &image, const std::vector &palette, const int &format) { - liq_attr *liq_attr; - liq_result *liq_result; - liq_histogram *liq_histogram; - const liq_palette *liq_palette; + std::vector remap(image.width*image.height); - liq_attr = liq_attr_create(); - liq_histogram = liq_histogram_create(liq_attr); + int min_dist_index; + double del_red, del_green, del_blue; + double dist, min_dist; - liq_set_speed(liq_attr, 1); - liq_set_max_colors(liq_attr, numColor); - - std::vector liq_images(images.size()); - - for (int i=0; icount); - for(int i=0; icount; i++) - palette[i] = { - liq_palette->entries[i].a, - liq_palette->entries[i].r, - liq_palette->entries[i].g, - liq_palette->entries[i].b }; - - for (int i=0; i(liq_err), indexes[i].width, indexes[i].height)); - } - - for (auto &i : liq_images) - liq_image_destroy(i); - - liq_attr_destroy(liq_attr); - liq_histogram_destroy(liq_histogram); - liq_result_destroy(liq_result); -} - -void BitPacking(const uint8_t bpp, IndexedImage &img, uint32_t &length) -{ - length = ((img.width*img.height) * bpp) / 8; - uint8_t *buffer = new uint8_t[length](); - uint32_t bufferPos = 0; - uint8_t bitpos = 0; - - for (int i2=0; i2<(img.width*img.height); i2++) - { - buffer[bufferPos] |= img.data[i2] << bitpos; - bitpos += bpp; - - if (bitpos == 8) + for (int j=0; j packed; + packed.reserve((remap.size() + 3) / 4); // Round up + + for (size_t i = 0; i < remap.size(); ) { + uint8_t byte = 0; + byte |= (remap[i] & 0b00000011); + + if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 2; + if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 4; + if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 6; + + packed.push_back(byte); + i++; // Move to next group + } + + return packed; + } + + else if (format == Format_INDEXED_16) + { + std::vector packed; + packed.reserve((remap.size() + 1) / 2); // Round up + + for (int i = 0; i < remap.size(); ) { + uint8_t byte = 0; + byte |= (remap[i] & 0b00001111); + + if (++i < remap.size()) { + byte |= (remap[i] & 0b00001111) << 4; + } + + packed.push_back(byte); + i++; // Move to next group + } + + return packed; + } + + else if (format == Format_INDEXED_8A32) + { + std::vector packed; + packed.reserve(remap.size()); + + for (size_t i = 0; i < remap.size(); ++i) { + uint8_t color_index = remap[i] & 0b00000111; // 3-bit mask + uint8_t alpha = ((uint8_t)palette[remap[i]].a >> 3) & 0b00011111; // 5-bit mask + + packed.push_back((alpha << 3) | color_index); + } + + return packed; + } + + else if (format == Format_INDEXED_32A8) + { + std::vector packed; + packed.reserve(remap.size()); + + for (size_t i = 0; i < remap.size(); ++i) { + uint8_t color_index = remap[i] & 0b00011111; // 5-bit mask + uint8_t alpha = ((uint8_t)palette[remap[i]].a >> 5) & 0b00000111; // 3-bit mask + + packed.push_back((alpha << 5) | color_index); + } + + return packed; + } + + return remap; } // // ---- -Image::Image(const std::filesystem::path &path) -{ - Load(path); -} - -Image::~Image() -{ - stbi_image_free(data); -} - void Image::Load(const std::filesystem::path &path) { - data = stbi_load(path.c_str(), &width, &height, &comp, 4); - if (data == nullptr) throw std::runtime_error(stbi_failure_reason()); -} + int comp; + stbi_uc *img = stbi_load(path.c_str(), &width, &height, &comp, 4); + if (img == nullptr) throw std::runtime_error(stbi_failure_reason()); -// // ---- + data.resize(width*height); + for (int i=0; iwidth = width; - this->height = height; - - data = new uint8_t[width*height]; - if (data == nullptr) throw std::runtime_error("not enough memory"); + stbi_image_free(img); } \ No newline at end of file