我建议使用有序抖动(http://en.wikipedia.org/wiki/Ordered_dithering),因为Floyd-Steinberg 需要更多的处理和计算,只适用于静止图像/不能很好地用于动画或不变的显示.
我创建了自己优化的有序抖动,从24/32位RGB颜色到16位RGB565颜色,将tresshold分隔为子像素(在我的AROMA项目中使用).它比Floyd-Steinberg快得多,因为没有昂贵的计算(特别是没有乘法和div计算),并且能够用于动画,因为它使用了固定的tresshold.
它的质量也比在wiki上定义的有序抖动算法好得多.
这里是抖动结果的一个例子:

而这里的来源.请享用!
/* Dither Tresshold for Red Channel */
static const BYTE dither_tresshold_r[64] = {
1, 7, 3, 5, 0, 8, 2, 6,
7, 1, 5, 3, 8, 0, 6, 2,
3, 5, 0, 8, 2, 6, 1, 7,
5, 3, 8, 0, 6, 2, 7, 1,
0, 8, 2, 6, 1, 7, 3, 5,
8, 0, 6, 2, 7, 1, 5, 3,
2, 6, 1, 7, 3, 5, 0, 8,
6, 2, 7, 1, 5, 3, 8, 0
};
/* Dither Tresshold for Green Channel */
static const BYTE dither_tresshold_g[64] = {
1, 3, 2, 2, 3, 1, 2, 2,
2, 2, 0, 4, 2, 2, 4, 0,
3, 1, 2, 2, 1, 3, 2, 2,
2, 2, 4, 0, 2, 2, 0, 4,
1, 3, 2, 2, 3, 1, 2, 2,
2, 2, 0, 4, 2, 2, 4, 0,
3, 1, 2, 2, 1, 3, 2, 2,
2, 2, 4, 0, 2, 2, 0, 4
};
/* Dither Tresshold for Blue Channel */
static const BYTE dither_tresshold_b[64] = {
5, 3, 8, 0, 6, 2, 7, 1,
3, 5, 0, 8, 2, 6, 1, 7,
8, 0, 6, 2, 7, 1, 5, 3,
0, 8, 2, 6, 1, 7, 3, 5,
6, 2, 7, 1, 5, 3, 8, 0,
2, 6, 1, 7, 3, 5, 0, 8,
7, 1, 5, 3, 8, 0, 6, 2,
1, 7, 3, 5, 0, 8, 2, 6
};
/* Get 16bit closest color */
BYTE closest_rb(BYTE c) {
return (c >> 3 << 3); /* red & blue */
}
BYTE closest_g(BYTE c) {
return (c >> 2 << 2); /* green */
}
/* RGB565 */
WORD RGB16BIT(BYTE r, BYTE g, BYTE b) {
return ((WORD)((r>>3)<<11)|((g>>2)<<5)|(b>>3));
}
/* Dithering by individual subpixel */
WORD dither_xy(
int x,
int y,
BYTE r,
BYTE g,
BYTE b
){
/* Get Tresshold Index */
BYTE tresshold_id = ((y & 7) << 3) + (x & 7);
r = closest_rb(
MIN(r + dither_tresshold_r[tresshold_id], 0xff)
);
g = closest_g(
MIN(g + dither_tresshold_g[tresshold_id], 0xff)
);
b = closest_rb(
MIN(b + dither_tresshold_b[tresshold_id], 0xff)
);
return RGB16BIT(r, g, b);
}
/* Dithering Pixel from 32/24bit RGB
*
* GetR, GetG, GetB -> Function to get individual color in pixel
*
*/
WORD dither_color_xy(int x, int y, DWORD col) {
return dither_xy(x, y, GetR(col), GetG(col), GetB(col));
}
/* EXAMPLES */
void ExampleDither1(WORD * dest, DWORD * src, int width, int height){
int x, y;
for (y=0; y<height; y++){
for (x=0; x<width; x++){
int pos = y * width + x;
dest[pos] = dither_color_xy(x,y,src[pos]);
}
}
}
void ExampleDither2(WORD * dest, BYTE * src, int width, int height){
int x, y;
for (y=0; y<height; y++){
for (x=0; x<width; x++){
int pos = y * width + x;
dest[pos] = dither_xy(x,y,src[pos*3],src[pos*3+1],src[pos*3+2]);
}
}
}
Run Code Online (Sandbox Code Playgroud)
另一个结果(前24位 - 底部有序RGB565-16bit):
查看全分辨率图像
正如您所提到的,Floyd-Steinberg抖动方法很受欢迎,因为它简单而快速.对于24位和16位颜色之间的细微差别,结果在视觉上几乎是最佳的.
有人建议我使用Lena样本图片,但我决定反对; 尽管它作为测试图像有着悠久的历史,但我认为它对现代感觉来说太过性别歧视了.相反,我提出了我自己的照片.首先是原始,然后转换为抖动RGB565(并转换回24位显示).

和代码,在C++中:
inline BYTE Clamp(int n)
{
n = n>255 ? 255 : n;
return n<0 ? 0 : n;
}
struct RGBTriplet
{
int r;
int g;
int b;
RGBTriplet(int _r = 0, int _g = 0, int _b = 0) : r(_r), g(_g), b(_b) {};
};
void RGB565Dithered(const BYTE * pIn, int width, int height, int strideIn, BYTE * pOut, int strideOut)
{
std::vector<RGBTriplet> oldErrors(width + 2);
for (int y = 0; y < height; ++y)
{
std::vector<RGBTriplet> newErrors(width + 2);
RGBTriplet errorAhead;
for (int x = 0; x < width; ++x)
{
int b = (int)(unsigned int)pIn[3*x] + (errorAhead.b + oldErrors[x+1].b) / 16;
int g = (int)(unsigned int)pIn[3*x + 1] + (errorAhead.g + oldErrors[x+1].g) / 16;
int r = (int)(unsigned int)pIn[3*x + 2] + (errorAhead.r + oldErrors[x+1].r) / 16;
int bAfter = Clamp(b) >> 3;
int gAfter = Clamp(g) >> 2;
int rAfter = Clamp(r) >> 3;
int pixel16 = (rAfter << 11) | (gAfter << 5) | bAfter;
pOut[2*x] = (BYTE) pixel16;
pOut[2*x + 1] = (BYTE) (pixel16 >> 8);
int error = r - ((rAfter * 255) / 31);
errorAhead.r = error * 7;
newErrors[x].r += error * 3;
newErrors[x+1].r += error * 5;
newErrors[x+2].r = error * 1;
error = g - ((gAfter * 255) / 63);
errorAhead.g = error * 7;
newErrors[x].g += error * 3;
newErrors[x+1].g += error * 5;
newErrors[x+2].g = error * 1;
error = b - ((bAfter * 255) / 31);
errorAhead.b = error * 7;
newErrors[x].b += error * 3;
newErrors[x+1].b += error * 5;
newErrors[x+2].b = error * 1;
}
pIn += strideIn;
pOut += strideOut;
oldErrors.swap(newErrors);
}
}
Run Code Online (Sandbox Code Playgroud)
我不保证这段代码是完美的,我已经不得不修复我在另一条评论中提到的那些微妙错误之一.但它确实产生了上述结果.它采用Windows使用的BGR顺序的24位像素,并以小端顺序生成R5G6B5 16位像素.