squawk/ui/utils/exponentialblur.cpp

580 lines
23 KiB
C++

/*
* Squawk messenger.
* Copyright (C) 2019 Yury Gubich <blue@macaw.me>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "exponentialblur.h"
static const int tileSize = 32;
template <class T>
static
inline void qt_memrotate90_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride)
{
sstride /= sizeof(T);
dstride /= sizeof(T);
const int pack = sizeof(quint32) / sizeof(T);
const int unaligned =
qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
const int restX = w % tileSize;
const int restY = (h - unaligned) % tileSize;
const int unoptimizedY = restY % pack;
const int numTilesX = w / tileSize + (restX > 0);
const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
for (int tx = 0; tx < numTilesX; ++tx) {
const int startx = w - tx * tileSize - 1;
const int stopx = qMax(startx - tileSize, 0);
if (unaligned) {
for (int x = startx; x >= stopx; --x) {
T *d = dest + (w - x - 1) * dstride;
for (int y = 0; y < unaligned; ++y) {
*d++ = src[y * sstride + x];
}
}
}
for (int ty = 0; ty < numTilesY; ++ty) {
const int starty = ty * tileSize + unaligned;
const int stopy = qMin(starty + tileSize, h - unoptimizedY);
for (int x = startx; x >= stopx; --x) {
quint32 *d = reinterpret_cast<quint32*>(dest + (w - x - 1) * dstride + starty);
for (int y = starty; y < stopy; y += pack) {
quint32 c = src[y * sstride + x];
for (int i = 1; i < pack; ++i) {
const int shift = (sizeof(T) * 8 * i);
const T color = src[(y + i) * sstride + x];
c |= color << shift;
}
*d++ = c;
}
}
}
if (unoptimizedY) {
const int starty = h - unoptimizedY;
for (int x = startx; x >= stopx; --x) {
T *d = dest + (w - x - 1) * dstride + starty;
for (int y = starty; y < h; ++y) {
*d++ = src[y * sstride + x];
}
}
}
}
}
template <class T>
static
inline void qt_memrotate90_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest,
int dstride)
{
const int numTilesX = (w + tileSize - 1) / tileSize;
const int numTilesY = (h + tileSize - 1) / tileSize;
for (int tx = 0; tx < numTilesX; ++tx) {
const int startx = w - tx * tileSize - 1;
const int stopx = qMax(startx - tileSize, 0);
for (int ty = 0; ty < numTilesY; ++ty) {
const int starty = ty * tileSize;
const int stopy = qMin(starty + tileSize, h);
for (int x = startx; x >= stopx; --x) {
T *d = (T *)((char*)dest + (w - x - 1) * dstride) + starty;
const char *s = (const char*)(src + x) + starty * sstride;
for (int y = starty; y < stopy; ++y) {
*d++ = *(const T *)(s);
s += sstride;
}
}
}
}
}
template <class T>
static
inline void qt_memrotate270_tiled(const T *src, int w, int h, int sstride, T *dest, int dstride)
{
sstride /= sizeof(T);
dstride /= sizeof(T);
const int pack = sizeof(quint32) / sizeof(T);
const int unaligned =
qMin(uint((quintptr(dest) & (sizeof(quint32)-1)) / sizeof(T)), uint(h));
const int restX = w % tileSize;
const int restY = (h - unaligned) % tileSize;
const int unoptimizedY = restY % pack;
const int numTilesX = w / tileSize + (restX > 0);
const int numTilesY = (h - unaligned) / tileSize + (restY >= pack);
for (int tx = 0; tx < numTilesX; ++tx) {
const int startx = tx * tileSize;
const int stopx = qMin(startx + tileSize, w);
if (unaligned) {
for (int x = startx; x < stopx; ++x) {
T *d = dest + x * dstride;
for (int y = h - 1; y >= h - unaligned; --y) {
*d++ = src[y * sstride + x];
}
}
}
for (int ty = 0; ty < numTilesY; ++ty) {
const int starty = h - 1 - unaligned - ty * tileSize;
const int stopy = qMax(starty - tileSize, unoptimizedY);
for (int x = startx; x < stopx; ++x) {
quint32 *d = reinterpret_cast<quint32*>(dest + x * dstride
+ h - 1 - starty);
for (int y = starty; y >= stopy; y -= pack) {
quint32 c = src[y * sstride + x];
for (int i = 1; i < pack; ++i) {
const int shift = (sizeof(T) * 8 * i);
const T color = src[(y - i) * sstride + x];
c |= color << shift;
}
*d++ = c;
}
}
}
if (unoptimizedY) {
const int starty = unoptimizedY - 1;
for (int x = startx; x < stopx; ++x) {
T *d = dest + x * dstride + h - 1 - starty;
for (int y = starty; y >= 0; --y) {
*d++ = src[y * sstride + x];
}
}
}
}
}
template <class T>
static
inline void qt_memrotate270_tiled_unpacked(const T *src, int w, int h, int sstride, T *dest,
int dstride)
{
const int numTilesX = (w + tileSize - 1) / tileSize;
const int numTilesY = (h + tileSize - 1) / tileSize;
for (int tx = 0; tx < numTilesX; ++tx) {
const int startx = tx * tileSize;
const int stopx = qMin(startx + tileSize, w);
for (int ty = 0; ty < numTilesY; ++ty) {
const int starty = h - 1 - ty * tileSize;
const int stopy = qMax(starty - tileSize, 0);
for (int x = startx; x < stopx; ++x) {
T *d = (T*)((char*)dest + x * dstride) + h - 1 - starty;
const char *s = (const char*)(src + x) + starty * sstride;
for (int y = starty; y >= stopy; --y) {
*d++ = *(const T*)s;
s -= sstride;
}
}
}
}
}
template <class T>
static
inline void qt_memrotate90_template(const T *src, int srcWidth, int srcHeight, int srcStride,
T *dest, int dstStride)
{
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
// packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
if (sizeof(quint32) % sizeof(T) == 0)
qt_memrotate90_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
else
#endif
qt_memrotate90_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
}
template <>
inline void qt_memrotate90_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
{
// packed algorithm doesn't have any benefit for quint32
qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
template <>
inline void qt_memrotate90_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
{
qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
template <class T>
static
inline void qt_memrotate180_template(const T *src, int w, int h, int sstride, T *dest, int dstride)
{
const char *s = (const char*)(src) + (h - 1) * sstride;
for (int dy = 0; dy < h; ++dy) {
T *d = reinterpret_cast<T*>((char *)(dest) + dy * dstride);
src = reinterpret_cast<const T*>(s);
for (int dx = 0; dx < w; ++dx) {
d[dx] = src[w - 1 - dx];
}
s -= sstride;
}
}
template <class T>
static
inline void qt_memrotate270_template(const T *src, int srcWidth, int srcHeight, int srcStride,
T *dest, int dstStride)
{
#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
// packed algorithm assumes little endian and that sizeof(quint32)/sizeof(T) is an integer
if (sizeof(quint32) % sizeof(T) == 0)
qt_memrotate270_tiled<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
else
#endif
qt_memrotate270_tiled_unpacked<T>(src, srcWidth, srcHeight, srcStride, dest, dstStride);
}
template <>
inline void qt_memrotate270_template<quint32>(const quint32 *src, int w, int h, int sstride, quint32 *dest, int dstride)
{
// packed algorithm doesn't have any benefit for quint32
qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
template <>
inline void qt_memrotate270_template<quint64>(const quint64 *src, int w, int h, int sstride, quint64 *dest, int dstride)
{
qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride);
}
#define QT_IMPL_MEMROTATE(type) \
void qt_memrotate90(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate90_template(src, w, h, sstride, dest, dstride); \
} \
void qt_memrotate180(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate180_template(src, w, h, sstride, dest, dstride); \
} \
void qt_memrotate270(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate270_template(src, w, h, sstride, dest, dstride); \
}
#define QT_IMPL_SIMPLE_MEMROTATE(type) \
void qt_memrotate90(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate90_tiled_unpacked(src, w, h, sstride, dest, dstride); \
} \
void qt_memrotate180(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate180_template(src, w, h, sstride, dest, dstride); \
} \
void qt_memrotate270(const type *src, int w, int h, int sstride, \
type *dest, int dstride) \
{ \
qt_memrotate270_tiled_unpacked(src, w, h, sstride, dest, dstride); \
}
QT_IMPL_MEMROTATE(quint64)
QT_IMPL_MEMROTATE(quint32)
QT_IMPL_MEMROTATE(quint16)
QT_IMPL_MEMROTATE(quint8)
void qt_memrotate90_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate90(srcPixels, w, h, sbpl, destPixels, dbpl);
}
void qt_memrotate180_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate180(srcPixels, w, h, sbpl, destPixels, dbpl);
}
void qt_memrotate270_8(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate270(srcPixels, w, h, sbpl, destPixels, dbpl);
}
void qt_memrotate90_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate90((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
}
void qt_memrotate180_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate180((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
}
void qt_memrotate270_16(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate270((const ushort *)srcPixels, w, h, sbpl, (ushort *)destPixels, dbpl);
}
void qt_memrotate90_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate90((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
}
void qt_memrotate180_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate180((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
}
void qt_memrotate270_32(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate270((const uint *)srcPixels, w, h, sbpl, (uint *)destPixels, dbpl);
}
void qt_memrotate90_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate90((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
}
void qt_memrotate180_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate180((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
}
void qt_memrotate270_64(const uchar *srcPixels, int w, int h, int sbpl, uchar *destPixels, int dbpl)
{
qt_memrotate270((const quint64 *)srcPixels, w, h, sbpl, (quint64 *)destPixels, dbpl);
}
#define AVG(a,b) ( ((((a)^(b)) & 0xfefefefeUL) >> 1) + ((a)&(b)) )
#define AVG16(a,b) ( ((((a)^(b)) & 0xf7deUL) >> 1) + ((a)&(b)) )
const int alphaIndex = (QSysInfo::ByteOrder == QSysInfo::BigEndian ? 0 : 3);
QImage qt_halfScaled(const QImage &source)
{
if (source.width() < 2 || source.height() < 2)
return QImage();
QImage srcImage = source;
if (source.format() == QImage::Format_Indexed8 || source.format() == QImage::Format_Grayscale8) {
// assumes grayscale
QImage dest(source.width() / 2, source.height() / 2, srcImage.format());
dest.setDevicePixelRatio(source.devicePixelRatioF());
const uchar *src = reinterpret_cast<const uchar*>(const_cast<const QImage &>(srcImage).bits());
qsizetype sx = srcImage.bytesPerLine();
qsizetype sx2 = sx << 1;
uchar *dst = reinterpret_cast<uchar*>(dest.bits());
qsizetype dx = dest.bytesPerLine();
int ww = dest.width();
int hh = dest.height();
for (int y = hh; y; --y, dst += dx, src += sx2) {
const uchar *p1 = src;
const uchar *p2 = src + sx;
uchar *q = dst;
for (int x = ww; x; --x, ++q, p1 += 2, p2 += 2)
*q = ((int(p1[0]) + int(p1[1]) + int(p2[0]) + int(p2[1])) + 2) >> 2;
}
return dest;
} else if (source.format() == QImage::Format_ARGB8565_Premultiplied) {
QImage dest(source.width() / 2, source.height() / 2, srcImage.format());
dest.setDevicePixelRatio(source.devicePixelRatioF());
const uchar *src = reinterpret_cast<const uchar*>(const_cast<const QImage &>(srcImage).bits());
qsizetype sx = srcImage.bytesPerLine();
qsizetype sx2 = sx << 1;
uchar *dst = reinterpret_cast<uchar*>(dest.bits());
qsizetype dx = dest.bytesPerLine();
int ww = dest.width();
int hh = dest.height();
for (int y = hh; y; --y, dst += dx, src += sx2) {
const uchar *p1 = src;
const uchar *p2 = src + sx;
uchar *q = dst;
for (int x = ww; x; --x, q += 3, p1 += 6, p2 += 6) {
// alpha
q[0] = AVG(AVG(p1[0], p1[3]), AVG(p2[0], p2[3]));
// rgb
const quint16 p16_1 = (p1[2] << 8) | p1[1];
const quint16 p16_2 = (p1[5] << 8) | p1[4];
const quint16 p16_3 = (p2[2] << 8) | p2[1];
const quint16 p16_4 = (p2[5] << 8) | p2[4];
const quint16 result = AVG16(AVG16(p16_1, p16_2), AVG16(p16_3, p16_4));
q[1] = result & 0xff;
q[2] = result >> 8;
}
}
return dest;
} else if (source.format() != QImage::Format_ARGB32_Premultiplied
&& source.format() != QImage::Format_RGB32)
{
srcImage = source.convertToFormat(QImage::Format_ARGB32_Premultiplied);
}
QImage dest(source.width() / 2, source.height() / 2, srcImage.format());
dest.setDevicePixelRatio(source.devicePixelRatioF());
const quint32 *src = reinterpret_cast<const quint32*>(const_cast<const QImage &>(srcImage).bits());
qsizetype sx = srcImage.bytesPerLine() >> 2;
qsizetype sx2 = sx << 1;
quint32 *dst = reinterpret_cast<quint32*>(dest.bits());
qsizetype dx = dest.bytesPerLine() >> 2;
int ww = dest.width();
int hh = dest.height();
for (int y = hh; y; --y, dst += dx, src += sx2) {
const quint32 *p1 = src;
const quint32 *p2 = src + sx;
quint32 *q = dst;
for (int x = ww; x; --x, q++, p1 += 2, p2 += 2)
*q = AVG(AVG(p1[0], p1[1]), AVG(p2[0], p2[1]));
}
return dest;
}
template <int shift>
inline int qt_static_shift(int value)
{
if (shift == 0)
return value;
else if (shift > 0)
return value << (uint(shift) & 0x1f);
else
return value >> (uint(-shift) & 0x1f);
}
template<int aprec, int zprec>
inline void qt_blurinner(uchar *bptr, int &zR, int &zG, int &zB, int &zA, int alpha)
{
QRgb *pixel = (QRgb *)bptr;
#define Z_MASK (0xff << zprec)
const int A_zprec = qt_static_shift<zprec - 24>(*pixel) & Z_MASK;
const int R_zprec = qt_static_shift<zprec - 16>(*pixel) & Z_MASK;
const int G_zprec = qt_static_shift<zprec - 8>(*pixel) & Z_MASK;
const int B_zprec = qt_static_shift<zprec>(*pixel) & Z_MASK;
#undef Z_MASK
const int zR_zprec = zR >> aprec;
const int zG_zprec = zG >> aprec;
const int zB_zprec = zB >> aprec;
const int zA_zprec = zA >> aprec;
zR += alpha * (R_zprec - zR_zprec);
zG += alpha * (G_zprec - zG_zprec);
zB += alpha * (B_zprec - zB_zprec);
zA += alpha * (A_zprec - zA_zprec);
#define ZA_MASK (0xff << (zprec + aprec))
*pixel =
qt_static_shift<24 - zprec - aprec>(zA & ZA_MASK)
| qt_static_shift<16 - zprec - aprec>(zR & ZA_MASK)
| qt_static_shift<8 - zprec - aprec>(zG & ZA_MASK)
| qt_static_shift<-zprec - aprec>(zB & ZA_MASK);
#undef ZA_MASK
}
template<int aprec, int zprec>
inline void qt_blurinner_alphaOnly(uchar *bptr, int &z, int alpha)
{
const int A_zprec = int(*(bptr)) << zprec;
const int z_zprec = z >> aprec;
z += alpha * (A_zprec - z_zprec);
*(bptr) = z >> (zprec + aprec);
}
template<int aprec, int zprec, bool alphaOnly>
inline void qt_blurrow(QImage & im, int line, int alpha)
{
uchar *bptr = im.scanLine(line);
int zR = 0, zG = 0, zB = 0, zA = 0;
if (alphaOnly && im.format() != QImage::Format_Indexed8)
bptr += alphaIndex;
const int stride = im.depth() >> 3;
const int im_width = im.width();
for (int index = 0; index < im_width; ++index) {
if (alphaOnly)
qt_blurinner_alphaOnly<aprec, zprec>(bptr, zA, alpha);
else
qt_blurinner<aprec, zprec>(bptr, zR, zG, zB, zA, alpha);
bptr += stride;
}
bptr -= stride;
for (int index = im_width - 2; index >= 0; --index) {
bptr -= stride;
if (alphaOnly)
qt_blurinner_alphaOnly<aprec, zprec>(bptr, zA, alpha);
else
qt_blurinner<aprec, zprec>(bptr, zR, zG, zB, zA, alpha);
}
}
template <int aprec, int zprec, bool alphaOnly>
void expblur(QImage &img, qreal radius, bool improvedQuality = false, int transposed = 0)
{
// halve the radius if we're using two passes
if (improvedQuality)
radius *= qreal(0.5);
Q_ASSERT(img.format() == QImage::Format_ARGB32_Premultiplied
|| img.format() == QImage::Format_RGB32
|| img.format() == QImage::Format_Indexed8
|| img.format() == QImage::Format_Grayscale8);
// choose the alpha such that pixels at radius distance from a fully
// saturated pixel will have an alpha component of no greater than
// the cutOffIntensity
const qreal cutOffIntensity = 2;
int alpha = radius <= qreal(1e-5)
? ((1 << aprec)-1)
: qRound((1<<aprec)*(1 - qPow(cutOffIntensity * (1 / qreal(255)), 1 / radius)));
int img_height = img.height();
for (int row = 0; row < img_height; ++row) {
for (int i = 0; i <= int(improvedQuality); ++i)
qt_blurrow<aprec, zprec, alphaOnly>(img, row, alpha);
}
QImage temp(img.width(), img.height(), img.format());
temp.setDevicePixelRatio(img.devicePixelRatioF());
if (transposed >= 0) {
if (img.depth() == 8) {
qt_memrotate270(reinterpret_cast<const quint8*>(img.bits()),
img.width(), img.height(), img.bytesPerLine(),
reinterpret_cast<quint8*>(temp.bits()),
temp.bytesPerLine());
} else {
qt_memrotate270(reinterpret_cast<const quint32*>(img.bits()),
img.width(), img.height(), img.bytesPerLine(),
reinterpret_cast<quint32*>(temp.bits()),
temp.bytesPerLine());
}
} else {
if (img.depth() == 8) {
qt_memrotate90(reinterpret_cast<const quint8*>(img.bits()),
img.width(), img.height(), img.bytesPerLine(),
reinterpret_cast<quint8*>(temp.bits()),
temp.bytesPerLine());
} else {
qt_memrotate90(reinterpret_cast<const quint32*>(img.bits()),
img.width(), img.height(), img.bytesPerLine(),
reinterpret_cast<quint32*>(temp.bits()),
temp.bytesPerLine());
}
}
img_height = temp.height();
for (int row = 0; row < img_height; ++row) {
for (int i = 0; i <= int(improvedQuality); ++i)
qt_blurrow<aprec, zprec, alphaOnly>(temp, row, alpha);
}
if (transposed == 0) {
if (img.depth() == 8) {
qt_memrotate90(reinterpret_cast<const quint8*>(temp.bits()),
temp.width(), temp.height(), temp.bytesPerLine(),
reinterpret_cast<quint8*>(img.bits()),
img.bytesPerLine());
} else {
qt_memrotate90(reinterpret_cast<const quint32*>(temp.bits()),
temp.width(), temp.height(), temp.bytesPerLine(),
reinterpret_cast<quint32*>(img.bits()),
img.bytesPerLine());
}
} else {
img = temp;
}
}
void Utils::exponentialblur(QImage& img, qreal radius, bool improvedQuality, int transposed)
{
expblur<12, 10, false>(img, radius, improvedQuality, transposed);
}