From 82cc0b60b71d61b0eb0f63c846ea2c83f53a63f7 Mon Sep 17 00:00:00 2001
From: sillysagiri <sillysagiri@privacyrequired.com>
Date: Fri, 16 May 2025 11:20:11 +0700
Subject: [PATCH] using online k-means to quantize image

---
 .gitignore     |   3 +-
 CMakeLists.txt |   1 -
 readme.txt     |   3 +-
 src/header.hpp |  62 +++++----
 src/kmean.cpp  | 341 +++++++++++++++++++++++++++++++++++++++++++++++++
 src/main.cpp   | 321 +++++++++++++++++++++++++---------------------
 6 files changed, 549 insertions(+), 182 deletions(-)
 create mode 100644 src/kmean.cpp
diff --git a/.gitignore b/.gitignore
index d73e005..3ded315 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 .cache
-build
\ No newline at end of file
+build
+.continue
\ No newline at end of file
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 85931fc..a5968df 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,7 +12,6 @@ project(${PROJECT_NAME} VERSION ${PROJECT_VERSION})
 
 list(APPEND CMAKE_MODULE_PATH "/opt/cmake")
 
-find_package(LIQ)
 find_package(STB)
 
 include(FetchContent)
diff --git a/readme.txt b/readme.txt
index ce21890..cc566f4 100644
--- a/readme.txt
+++ b/readme.txt
@@ -46,7 +46,7 @@ binary format:
 
 TODO:
   [x] bitpacking for indexed format
-  [ ] bitpacking for alpha indexed format
+  [x] bitpacking for alpha indexed format
   [ ] convert image using predefined palette
   [ ] output preview image
   [ ] improve error handling
@@ -55,7 +55,6 @@ TODO:
 dependencies:
   argparse (https://github.com/p-ranav/argparse)
   stb_image (https://github.com/nothings/stb)
-  libimagequant (https://github.com/ImageOptim/libimagequant)
   murmurhash.c (https://github.com/jwerle/murmurhash.c)
   Timer.h from cherno (https://gist.github.com/TheCherno/b2c71c9291a4a1a29c889e76173c8d14)
 
diff --git a/src/header.hpp b/src/header.hpp
index 9da5c26..7fb4b94 100644
--- a/src/header.hpp
+++ b/src/header.hpp
@@ -41,39 +41,32 @@ enum Format
 	Format_INDEXED_4,
 	Format_INDEXED_16,
 	Format_INDEXED_256,
-	Format_INDEXED_32A3,
-	Format_INDEXED_8A5,
+	Format_INDEXED_32A8,
+	Format_INDEXED_8A32,
 	Format_PALETTE_16,
 };
 
-struct Image {
-	int width, height, comp;
-	stbi_uc *data;
-
-	constexpr Image() : width(0), height(0), comp(0), data(nullptr) {}
-	Image(const std::filesystem::path &path);
-	~Image();
-
-	void Load(const std::filesystem::path &path);
-};
-
-struct IndexedImage
-{
-	int width, height;
-	uint8_t *data;
-
-	constexpr IndexedImage() : width(0), height(0), data(nullptr) {}
-	IndexedImage(int width, int height);
-	~IndexedImage();
-
-	void Load(int width, int height);
-};
-
 struct Color {
-	uint8_t a;
-	uint8_t r;
-	uint8_t g;
-	uint8_t b;
+	double r = 0;
+	double g = 0;
+	double b = 0;
+	double a = 255;
+
+	inline uint16_t toRGB16(uint8_t alpha_threshold = 128) const {
+		const uint16_t alpha_bit = (a >= alpha_threshold) ? 0b1000000000000000 : 0;
+		return alpha_bit
+				| (((uint8_t)r >> 3) & 0b00011111)
+				| (((uint8_t)g >> 3) & 0b00011111) << 5
+				| (((uint8_t)b >> 3) & 0b00011111) << 10;
+	}
+};
+
+struct Image {
+	int width = 0, height = 0;
+	std::vector<Color> data;
+
+	~Image() {}
+	void Load(const std::filesystem::path &path);
 };
 
 struct Options {
@@ -85,6 +78,11 @@ struct Options {
 void Verify();
 void Convert();
 
-uint16_t RGB16(uint8_t a, uint8_t r, uint8_t g, uint8_t b);
-void QuantizeImage(const int numColor, std::vector<Image> &images, std::vector<Color> &palette, std::vector<IndexedImage> &indexes);
-void BitPacking(const uint8_t bpp, IndexedImage &img, uint32_t &length);
\ No newline at end of file
+std::vector<Color> QuantizePalette(const int numColor, const Image &image);
+std::vector<uint8_t> RemapImage(const Image &image, const std::vector<Color> &palette, const int &format);
+
+std::vector<Color> inc_online_kmeans ( const Image *img, const int num_colors, 
+		    const double lr_exp = 0.5, const double sample_rate = 0.5);
+
+/* Max. L_2^2 distance in 24-bit RGB space = 3 * 255 * 255 */
+#define MAX_RGB_DIST 195075
\ No newline at end of file
diff --git a/src/kmean.cpp b/src/kmean.cpp
new file mode 100644
index 0000000..5f4ac97
--- /dev/null
+++ b/src/kmean.cpp
@@ -0,0 +1,341 @@
+/*
+https://github.com/AmberAbernathy/Color_Quantization/blob/main/test_km_algs.cpp
+
+Online K-Means (MacQueen, 1967)
+Incremental Online K-Means (Abernathy & Celebi, 2022)
+
+Authors: Amber Abernathy & M. Emre Celebi
+
+Contact email: ecelebi@uca.edu
+
+If you find this program useful, please cite:
+A. D. Abernathy and M. E. Celebi, 
+The Incremental Online K-Means Clustering Algorithm 
+and Its Application to Color Quantization,
+Expert Systems with Applications,
+in press, https://doi.org/10.1016/j.eswa.2022.117927, 2022.
+*/
+
+#include <climits>
+#include <math.h>
+#include "header.hpp"
+
+typedef unsigned char uchar;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+
+struct RGB_Cluster
+{
+ int size;
+ Color center;
+};
+
+/* Max. # colors that can be requested */
+#define MAX_NUM_COLORS 256
+
+/* 
+  Powers of two for 0, 1, ..., 16. Note that 2^16 must equal MAX_NUM_COLORS.
+  If you want to quantize to more than MAX_NUM_COLORS colors, extend the POW2 
+  array accordingly.
+ */
+static inline int POW2[] = { 1, 2, 4, 8, 16, 32, 64, 128, 256 };
+
+/*
+  Function to generate two quasirandom numbers from 
+  a 2D Sobol sequence. Adapted from Numerical Recipies 
+  in C. Upon return, X and Y fall in [0,1).
+ */
+
+#define MAX_BIT 30
+
+void 
+sob_seq ( double *x, double *y )
+{
+ int j, k, l;
+ ulong i, im, ipp;
+ static double fac;
+ static int init = 0;
+ static ulong ix1, ix2;
+ static ulong in, *iu[2 * MAX_BIT + 1];
+ static ulong mdeg[3] = { 0, 1, 2 };
+ static ulong ip[3] = { 0, 0, 1 };
+ static ulong iv[2 * MAX_BIT + 1] =
+ { 0, 1, 1, 1, 1, 1, 1, 3, 1, 3, 3, 1, 1, 5, 7, 7, 3, 3, 5, 15, 11, 5, 15, 13, 9 };
+
+ if ( !init )
+  {
+   init = 1;
+   for ( j = 1, k = 0; j <= MAX_BIT; j++, k += 2 )
+    {
+     iu[j] = &iv[k];
+    }
+
+   for ( k = 1; k <= 2; k++ )
+    {
+     for ( j = 1; j <= ( int ) mdeg[k]; j++ )
+      {
+       iu[j][k] <<= ( MAX_BIT - j );
+      }
+
+     for ( j = mdeg[k] + 1; j <= MAX_BIT; j++ )
+      {
+       ipp = ip[k];
+       i = iu[j - mdeg[k]][k];
+       i ^= ( i >> mdeg[k] );
+
+       for ( l = mdeg[k] - 1; l >= 1; l-- )
+        {
+ 	 if ( ipp & 1 )
+ 	  {
+  	   i ^= iu[j - l][k];
+	  }
+
+	 ipp >>= 1;
+	}
+
+       iu[j][k] = i;
+      }
+    }
+
+   fac = 1.0 / ( 1L << MAX_BIT );
+   in = 0;
+  }
+
+ im = in;
+ for ( j = 1; j <= MAX_BIT; j++ )
+  {
+   if ( ! ( im & 1 ) )
+    {
+     break;
+    }
+
+   im >>= 1;
+  }
+
+ im = (j - 1) * 2;
+ *x = (ix1 ^= iv[im + 1]) * fac;
+ *y = (ix2 ^= iv[im + 2]) * fac;
+ in++;
+}
+
+#undef MAX_BIT
+
+/*
+  Function to determine if an integer is a power of 2:
+  http://graphics.stanford.edu/~seander/bithacks.html#DetermineIfPowerOf2
+*/
+
+bool
+is_pow2 ( const int x )
+{ 
+ uint ux = ( uint ) x;
+
+ return ux && !( ux & ( ux - 1 ) );
+}
+
+/* 
+  Online K-Means Algorithm:
+  S. Thompson, M. E. Celebi, and K. H. Buck, 
+  Fast Color Quantization Using MacQueen’s K-Means Algorithm, 
+  Journal of Real-Time Image Processing, 
+  17(5): 1609-1624, 2020.  
+
+  Notes:
+        1) LR_EXP: Learning rate exponent (must be in [0.5, 1])
+        2) SAMPLE_RATE: Fraction of the input pixels (must be in (0, 1]) 
+	   used during the clustering process.
+	3) CLUST: When the function is called, CLUST represents the initial 
+	   centers. Upon return, CLUST represents the final centers.
+ */
+
+void 
+online_kmeans ( const Image *img, const int num_colors, const double lr_exp, 
+		const double sample_rate, RGB_Cluster *clust )
+{
+ int min_dist_index;
+ int old_size, new_size;
+ int row_idx, col_idx;
+ int num_samples;
+ double sob_x, sob_y;
+ double del_red, del_green, del_blue;
+ double dist, min_dist;
+ double learn_rate;
+ Color rand_pixel;
+
+ if ( lr_exp < 0.5 || lr_exp > 1. )
+  {
+   fprintf ( stderr, "Learning rate exponent (%g) must be in [0.5, 1]\n", lr_exp );
+   exit ( EXIT_FAILURE );
+  }
+ else if ( sample_rate <= 0.0 || sample_rate > 1. )
+  {
+   fprintf ( stderr, "Sampling rate (%g) must be in (0, 1]\n", sample_rate );
+   exit ( EXIT_FAILURE );
+  }
+
+ num_samples = ( int ) ( sample_rate * (img->width*img->height) + 0.5 ); /* round */
+	
+ for ( int i = 0; i < num_samples; i++ ) 
+  {
+   /* Sample the image quasirandomly based on a Sobol' sequence */
+   sob_seq ( &sob_x, &sob_y );
+
+   /* Find the corresponding row/column indices */
+   row_idx = ( int ) ( sob_y * img->height + 0.5 ); /* round */
+   if ( row_idx == img->height )
+    {
+     row_idx--;
+    }
+
+   col_idx = ( int ) ( sob_x * img->width + 0.5 ); /* round */
+   if ( col_idx == img->width )
+    {
+     col_idx--;
+    }
+
+   rand_pixel = img->data[row_idx * img->width + col_idx];
+
+   /* Find the nearest center */
+   min_dist = MAX_RGB_DIST;
+   min_dist_index = -INT_MAX;
+   for ( int j = 0; j < num_colors; j++ ) 
+    {
+     del_red = clust[j].center.r - rand_pixel.r;
+     del_green = clust[j].center.g - rand_pixel.g;
+     del_blue = clust[j].center.b - rand_pixel.b;
+
+     dist = del_red * del_red + del_green * del_green + del_blue * del_blue;
+     if ( dist < min_dist ) 
+      {
+       min_dist = dist;
+       min_dist_index = j;
+      }
+    }
+
+   /* Update the size of the nearest cluster */
+   old_size = clust[min_dist_index].size;
+   new_size = old_size + 1;
+
+   /* Compute the learning rate */
+   learn_rate = pow ( new_size, -lr_exp );	
+
+   /* Update the center of the nearest cluster */
+   clust[min_dist_index].center.r += learn_rate * 
+	                               ( rand_pixel.r - clust[min_dist_index].center.r );
+   clust[min_dist_index].center.g += learn_rate * 
+	                                 ( rand_pixel.g - clust[min_dist_index].center.g );
+   clust[min_dist_index].center.b += learn_rate * 
+	                                ( rand_pixel.b - clust[min_dist_index].center.b );
+   clust[min_dist_index].size = new_size;
+  }
+}
+
+/* Function to compute the centroid of an image */
+
+RGB_Cluster 
+compute_centroid ( const Image *img ) 
+{
+ double sum_red = 0.0, sum_green = 0.0, sum_blue = 0.0;
+ Color pixel;
+ RGB_Cluster centroid;
+
+ for (int i = 0; i < (img->width*img->height); i++) 
+  {
+   pixel = img->data[i];
+   sum_red += pixel.r;
+   sum_green += pixel.g;
+   sum_blue += pixel.b;
+  }
+
+ centroid.center.r = sum_red / (img->width*img->height);
+ centroid.center.g = sum_green / (img->width*img->height);
+ centroid.center.b = sum_blue / (img->width*img->height);
+
+ return centroid;
+}
+
+/* 
+  Incremental Online K-Means Algorithm:
+
+  A. D. Abernathy and M. E. Celebi, 
+  The Incremental Online K-Means Clustering Algorithm 
+  and Its Application to Color Quantization,
+  Expert Systems with Applications,
+  accepted for publication, 2022.
+  
+  Notes:
+        1) NUM_COLORS must be a power of 2 (otherwise the code must be 
+	   modified slightly, see Abernathy & Celebi, 2022).
+        2) LR_EXP: Learning rate exponent (must be in [0.5, 1])
+        3) SAMPLE_RATE: Fraction of the input pixels (must be in (0, 1]) 
+	   used during the clustering process.
+ */
+
+std::vector<Color>
+inc_online_kmeans ( const Image *img, const int num_colors, 
+		    const double lr_exp, const double sample_rate )
+{
+ int index, num_splits;
+ Color pixel;
+ RGB_Cluster *tmp_clust, *clust;
+ 
+ if ( !is_pow2 ( num_colors ) )
+  {
+   fprintf ( stderr, "Number of colors (%d) must be a power of 2!\n", num_colors );
+   exit ( EXIT_FAILURE );
+  }
+
+ /* Compute log2 ( num_colors ) */
+ num_splits = ( int ) ( log ( num_colors ) / log ( 2 ) + 0.5 ); /* round */
+ 
+ tmp_clust = ( RGB_Cluster * ) malloc ( ( 2 * num_colors - 1 ) * sizeof ( RGB_Cluster ) );
+ clust = ( RGB_Cluster * ) malloc ( num_colors * sizeof ( RGB_Cluster ) );
+
+ /* Set first center to be the dataset centroid */
+ tmp_clust[0] = compute_centroid ( img );
+ tmp_clust[0].size = 0;
+
+ for ( int t = 0; t < num_splits; t++ )
+  {
+   for ( int n = POW2[t] - 1; n < POW2[t + 1] - 1; n++ )
+    {
+     /* Split c_n into c_{2n + 1} and c_{2n + 2} */
+     pixel = tmp_clust[n].center;
+
+     /* Left child */
+     index = 2 * n + 1;
+     tmp_clust[index].center.r = pixel.r;
+     tmp_clust[index].center.g = pixel.g;
+     tmp_clust[index].center.b = pixel.b;
+     tmp_clust[index].size = 0;
+	
+     /* Right child */
+     index++;
+     tmp_clust[index].center.r = pixel.r;
+     tmp_clust[index].center.g = pixel.g;
+     tmp_clust[index].center.b = pixel.b;
+     tmp_clust[index].size = 0;
+    }
+
+   /* Refine the new centers using online k-means */
+   online_kmeans ( img, POW2[t + 1], lr_exp, sample_rate, 
+		   tmp_clust + POW2[t + 1] - 1 );
+  }
+	
+ /* Last NUM_COLORS centers are the final centers */
+ for ( int j = 0; j < num_colors; j++ ) 
+  {
+   clust[j].center.r = tmp_clust[j + num_colors - 1].center.r;
+   clust[j].center.g = tmp_clust[j + num_colors - 1].center.g;
+   clust[j].center.b = tmp_clust[j + num_colors - 1].center.b;
+  }
+
+ free (tmp_clust);
+
+ std::vector<Color> result(num_colors);
+ for (int i=0; i<num_colors; i++)
+  result[i] = clust[i].center;
+
+ free(clust);
+ return result;
+}
\ No newline at end of file
diff --git a/src/main.cpp b/src/main.cpp
index bcda7e0..a567040 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -22,7 +22,6 @@
 #define STB_IMAGE_WRITE_IMPLEMENTATION
 
 #include <argparse/argparse.hpp>
-#include "libimagequant.h"
 #include "header.hpp"
 #include "Timer.h"
 #include "fastlz.h"
@@ -50,9 +49,9 @@ int main(int argc, char **argv)
 			.store_into(opt.path_output);
 
 		program.add_argument("-f", "--format")
-			.help("texture format { rgb256, rgb16, indexed4, indexed16, indexed256, indexed32a3, indexed8a5 }")
+			.help("texture format { rgb256, rgb16, indexed4, indexed16, indexed256, indexed32a8, indexed8a32 }")
 			.default_value<std::string>("rgb16")
-			.choices("rgb256", "rgb16", "indexed4", "indexed16", "indexed256", "indexed32a3", "indexed8a5")
+			.choices("rgb256", "rgb16", "indexed4", "indexed16", "indexed256", "indexed32a8", "indexed8a32")
 			.nargs(1)
 			.store_into(opt.format);
 	}
@@ -96,10 +95,10 @@ int main(int argc, char **argv)
 				meta.height = img.height;
 				meta.palette_count = 0;
 				meta.palette_hash = 0;
-				meta.original_size = img.width*img.height*img.comp;
+				meta.original_size = img.width*img.height*4;
 
 				uint8_t compress[uint64_t(meta.original_size*1.5)];
-				meta.compress_size = fastlz_compress_level(1, img.data, meta.original_size, compress);
+				meta.compress_size = fastlz_compress_level(1, img.data.data(), meta.original_size, compress);
 
 				auto output = fs::path(opt.path_output) / std::format("{}.sillyimg", fs::path(input).stem().string());
 				std::ofstream out(output, std::ios::binary);
@@ -119,14 +118,7 @@ int main(int argc, char **argv)
 
 				// convert into RGB16 color
 				for (int i=0; i<img.width*img.height; i++)
-				{
-					uint8_t a = img.data[i*4+3];
-					uint8_t r = img.data[i*4+0];
-					uint8_t g = img.data[i*4+1];
-					uint8_t b = img.data[i*4+2];
-
-					img16[i] = RGB16(a, r, g, b);
-				}
+					img16[i] = img.data[i].toRGB16();
 
 				Metadata meta;
 				meta.format = Format::Format_RGB_16;
@@ -147,68 +139,96 @@ int main(int argc, char **argv)
 			}
 		}
 
-		if (opt.format == "indexed4" || opt.format == "indexed16" || opt.format == "indexed256")
+		if (opt.format == "indexed4" ||
+				opt.format == "indexed16" ||
+				opt.format == "indexed256" ||
+				opt.format == "indexed32a8" ||
+				opt.format == "indexed8a32")
 		{
 			uint16_t numcol;
-			uint16_t bpp;
+			uint8_t format;
 
 			if (opt.format == "indexed4")
 			{
 				numcol = 4;
-				bpp = 2;
+				format = Format_INDEXED_4;
 			}
-			if (opt.format == "indexed16")
+			else if (opt.format == "indexed16")
 			{
 				numcol = 16;
-				bpp = 4;
+				format = Format_INDEXED_16;
 			}
-			if (opt.format == "indexed256")
+			else if (opt.format == "indexed256")
 			{
 				numcol = 256;
-				bpp = 8;
+				format = Format_INDEXED_256;
+			}
+			else if (opt.format == "indexed32a8")
+			{
+				numcol = 32;
+				format = Format_INDEXED_32A8;
+			}
+			else if (opt.format == "indexed8a32")
+			{
+				numcol = 8;
+				format = Format_INDEXED_8A32;
 			}
 
 			std::vector<Color> palette;
 			std::vector<Image> images(opt.path_input.size());
-			std::vector<IndexedImage> indexes(opt.path_input.size());
 			
 			for (int i=0; i<images.size(); i++)
 				images[i].Load(opt.path_input[i]);
 
-			QuantizeImage(numcol, images, palette, indexes);
-			uint16_t palette16[palette.size()];
-			for (const auto &pal : palette)
-				uint16_t rgb16 = RGB16(pal.a, pal.r, pal.g, pal.b);
-
-			for (int i=0; i<indexes.size(); i++)
+			if (images.size() > 1)
 			{
-				uint32_t length = indexes[i].width*indexes[i].height;
-				if (bpp < 8) BitPacking(bpp, indexes[i], length);
+				int combined_size = 0;
+				Image combined;
+				
+				// pre-calclate size
+				for (auto &image : images)
+					combined_size += image.data.size();
+
+				combined.data.reserve(combined_size);
+
+				for (const auto& image : images)
+					combined.data.insert(combined.data.end(), image.data.begin(), image.data.end());
+
+				palette = QuantizePalette(numcol, combined);
+			}
+			else palette = QuantizePalette(numcol, images[0]);
+
+			uint16_t palette16[palette.size()];
+			for (int i=0; i<palette.size(); i++)
+				palette16[i] = palette[i].toRGB16();
+
+			uint32_t hash = murmurhash(reinterpret_cast<const char*>(palette16), sizeof(uint16_t) * palette.size(), 0);
+
+			for (int i=0; i<images.size(); i++)
+			{
+				std::vector<uint8_t> remap = RemapImage(images[i], palette, format);
+				// std::vector<uint8_t> remap = {1,2,3,4,54,5,6,7,78,23,34,32,32,12,5,34};
 
 				Metadata meta;
-				meta.format = Format::Format_RGB_16;
-				meta.width = indexes[i].width;
-				meta.height = indexes[i].height;
-				meta.palette_count = numcol;
-				meta.palette_hash = murmurhash(reinterpret_cast<const char*>(palette16), sizeof(uint16_t) * palette.size(), 1);
-				meta.original_size = length;
+				meta.format = format;
+				meta.width = images[i].width;
+				meta.height = images[i].height;
+				meta.palette_count = palette.size();
+				meta.palette_hash = hash;
+				meta.original_size = remap.size();
 
 				uint8_t compress[uint64_t(meta.original_size*1.5)];
-				meta.compress_size = fastlz_compress_level(1, indexes[i].data, meta.original_size, compress);
+				meta.compress_size = fastlz_compress_level(1, remap.data(), meta.original_size, compress);
 
 				auto output = fs::path(opt.path_output) / std::format("{}.sillyimg", fs::path(opt.path_input[i]).stem().string());
-				std::ofstream out(output, std::ios::binary);
+				// std::ofstream out(output, std::ios::binary);
 				
-				out.write(reinterpret_cast<const char*>(&meta), sizeof(meta));
-				out.write(reinterpret_cast<const char*>(palette16), sizeof(uint16_t) * palette.size());
-				out.write(reinterpret_cast<const char*>(compress), meta.compress_size);
-				out.close();
+				// out.write(reinterpret_cast<const char*>(&meta), sizeof(meta));
+				// out.write(reinterpret_cast<const char*>(palette16), sizeof(uint16_t) * palette.size());
+				// out.write(reinterpret_cast<const char*>(compress), meta.compress_size);
+				// out.close();
 			}
 		}
-
-		if (opt.format == "indexed32a3" || opt.format == "indexed8a5")
-			throw std::runtime_error("format not supported yet!");
-
 	}
 	catch(const std::exception &e)
 	{
@@ -228,125 +248,134 @@ int main(int argc, char **argv)
 
 // // ----
 
-uint16_t RGB16(uint8_t a, uint8_t r, uint8_t g, uint8_t b)
+std::vector<Color> QuantizePalette(const int numColor, const Image &image)
 {
-	// TODO: alpha threshold
-	uint16_t a1 = a & 0x01;
-	uint16_t r5 = (r >> 3) & 0x1F;
-	uint16_t g5 = (g >> 3) & 0x1F;
-	uint16_t b5 = (b >> 3) & 0x1F;
-
-	return (a1 << 15) | (r5) | (g5 << 5) | (b5 << 10);
+	return inc_online_kmeans(&image, numColor);
 }
 
-void QuantizeImage(const int numColor, std::vector<Image> &images, std::vector<Color> &palette, std::vector<IndexedImage> &indexes)
+std::vector<uint8_t> RemapImage(const Image &image, const std::vector<Color> &palette, const int &format)
 {
-	liq_attr *liq_attr;
-	liq_result *liq_result;
-	liq_histogram *liq_histogram;
-	const liq_palette *liq_palette;
+	std::vector<uint8_t> remap(image.width*image.height);
 
-	liq_attr = liq_attr_create();
-	liq_histogram = liq_histogram_create(liq_attr);
+	int min_dist_index;
+	double del_red, del_green, del_blue;
+	double dist, min_dist;
 
-	liq_set_speed(liq_attr, 1);
-	liq_set_max_colors(liq_attr, numColor);
-
-	std::vector<liq_image*> liq_images(images.size());
-
-	for (int i=0; i<images.size(); i++)
+	// TODO: explore more distance algorithm
+	for (int i=0; i<image.data.size(); i++)
 	{
-		liq_images[i] = liq_image_create_rgba(liq_attr, images[i].data, images[i].width, images[i].height, 0); 
-		liq_histogram_add_image(liq_histogram, liq_attr, liq_images[i]);
-	}
+		const auto &pixel = image.data[i];
+		min_dist = MAX_RGB_DIST;
+		min_dist_index = -INT_MAX;
 
-	auto liq_err = liq_histogram_quantize(liq_histogram, liq_attr, &liq_result);
-	liq_set_dithering_level(liq_result, 0);
-	liq_palette = liq_get_palette(liq_result);
-	assert(liq_err == LIQ_OK);
-
-	palette.resize(liq_palette->count);
-	for(int i=0; i<liq_palette->count; i++)
-		palette[i] = {
-			liq_palette->entries[i].a,
-			liq_palette->entries[i].r,
-			liq_palette->entries[i].g,
-			liq_palette->entries[i].b };
-
-	for (int i=0; i<images.size(); i++)
-	{
-		indexes[i].Load(images[i].width, images[i].height);
-		liq_err = liq_write_remapped_image(liq_result, liq_images[i], indexes[i].data, indexes[i].width*indexes[i].height);
-		if (liq_err != LIQ_OK) throw std::runtime_error(std::format("{} {} {}", static_cast<int>(liq_err), indexes[i].width, indexes[i].height));
-	}
-
-	for (auto &i : liq_images)
-		liq_image_destroy(i);
-
-	liq_attr_destroy(liq_attr);
-	liq_histogram_destroy(liq_histogram);
-	liq_result_destroy(liq_result);
-}
-
-void BitPacking(const uint8_t bpp, IndexedImage &img, uint32_t &length)
-{
-	length = ((img.width*img.height) * bpp) / 8;
-	uint8_t *buffer = new uint8_t[length]();
-	uint32_t bufferPos = 0;
-	uint8_t bitpos = 0;
-
-	for (int i2=0; i2<(img.width*img.height); i2++)
-	{
-		buffer[bufferPos] |= img.data[i2] << bitpos;
-		bitpos += bpp;
-
-		if (bitpos == 8)
+		for (int j=0; j<palette.size(); j++) 
 		{
-			bitpos = 0;
-			bufferPos++;
+			del_red = palette[j].r - pixel.r;
+			del_green = palette[j].g - pixel.g;
+			del_blue = palette[j].b - pixel.b;
+
+			dist = del_red * del_red + del_green * del_green + del_blue * del_blue;
+			if ( dist < min_dist ) 
+			{
+				min_dist = dist;
+				min_dist_index = j;
+			}
 		}
+
+		remap[i] = min_dist_index;
 	}
 
-	delete[] img.data;
-	img.data = buffer;
+	// ---
+
+	if (format == Format_INDEXED_4)
+	{
+		std::vector<uint8_t> packed;
+		packed.reserve((remap.size() + 3) / 4); // Round up
+		
+		for (size_t i = 0; i < remap.size(); ) {
+			uint8_t byte = 0;
+			byte |= (remap[i] & 0b00000011);
+			
+			if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 2;
+			if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 4;
+			if (++i < remap.size()) byte |= (remap[i] & 0b00000011) << 6;
+			
+			packed.push_back(byte);
+			i++; // Move to next group
+		}
+		
+		return packed;
+	}
+
+	else if (format == Format_INDEXED_16)
+	{
+		std::vector<uint8_t> packed;
+		packed.reserve((remap.size() + 1) / 2); // Round up
+
+		for (int i = 0; i < remap.size(); ) {
+			uint8_t byte = 0;
+			byte |= (remap[i] & 0b00001111);
+
+			if (++i < remap.size()) {
+					byte |= (remap[i] & 0b00001111) << 4;
+			}
+
+			packed.push_back(byte);
+			i++; // Move to next group
+		}
+
+		return packed;
+	}
+
+	else if (format == Format_INDEXED_8A32)
+	{
+		std::vector<uint8_t> packed;
+		packed.reserve(remap.size());
+
+		for (size_t i = 0; i < remap.size(); ++i) {
+			uint8_t color_index = remap[i] & 0b00000111;  // 3-bit mask
+			uint8_t alpha = ((uint8_t)palette[remap[i]].a >> 3) & 0b00011111; // 5-bit mask
+
+			packed.push_back((alpha << 3) | color_index);
+		}
+
+		return packed;
+	}
+
+	else if (format == Format_INDEXED_32A8)
+	{
+		std::vector<uint8_t> packed;
+		packed.reserve(remap.size());
+
+		for (size_t i = 0; i < remap.size(); ++i) {
+			uint8_t color_index = remap[i] & 0b00011111;  // 5-bit mask
+			uint8_t alpha = ((uint8_t)palette[remap[i]].a >> 5) & 0b00000111; // 3-bit mask
+
+			packed.push_back((alpha << 5) | color_index);
+		}
+
+		return packed;
+	}
+
+	return remap;
 }
 
 // // ----
 
-Image::Image(const std::filesystem::path &path)
-{
-	Load(path);
-}
-
-Image::~Image()
-{
-	stbi_image_free(data);
-}
-
 void Image::Load(const std::filesystem::path &path)
 {
-	data = stbi_load(path.c_str(), &width, &height, &comp, 4);
-	if (data == nullptr) throw std::runtime_error(stbi_failure_reason());
-}
+	int comp;
+	stbi_uc *img = stbi_load(path.c_str(), &width, &height, &comp, 4);
+	if (img == nullptr) throw std::runtime_error(stbi_failure_reason());
 
-// // ----
+	data.resize(width*height);
+	for (int i=0; i<width*height; i++) {
+		const int offset = i * 4;
+		data[i].r = img[offset];
+		data[i].g = img[offset+1];
+		data[i].b = img[offset+2];
+		data[i].a = img[offset+3];
+	}
 
-IndexedImage::IndexedImage(int width, int height)
-	: width(width), height(height), data(nullptr)
-{
-	Load(width, height);
-}
-
-IndexedImage::~IndexedImage()
-{
-	delete[] data; 
-}
-
-void IndexedImage::Load(int width, int height)
-{
-	this->width = width;
-	this->height = height;
-
-	data = new uint8_t[width*height];
-	if (data == nullptr) throw std::runtime_error("not enough memory");
+	stbi_image_free(img);
 }
\ No newline at end of file