19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
22 #if defined(__cplusplus) || defined(c_plusplus)
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
38 const char *accelerateKernels =
51 OPENCL_DEFINE(
MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
52 OPENCL_DEFINE(
MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
280 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
282 return((CLQuantum) value);
289 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
291 return((CLQuantum) (257.0f*value));
298 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
300 return((CLQuantum) (16843009.0*value));
306 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
320 return (CLQuantum) (clamp(value, 0.0f,
QuantumRange) + 0.5f);
327 static inline
int ClampToCanvas(const
int offset,const
int range)
329 return clamp(offset, (
int)0, range-1);
334 static inline uint ScaleQuantumToMap(CLQuantum value)
336 if (value >= (CLQuantum)
MaxMap)
337 return ((uint)MaxMap);
339 return ((uint)value);
346 float sign = x < (float) 0.0 ? (
float) -1.0 : (float) 1.0;
354 return clamp(value,0.0f,1.0f);
360 static inline unsigned int getPixelIndex(
const unsigned int number_channels,
361 const unsigned int columns,
const unsigned int x,
const unsigned int y)
363 return (x * number_channels) + (y * columns * number_channels);
366 static inline float getPixelRed(
const __global CLQuantum *p) {
return (
float)*p; }
367 static inline float getPixelGreen(
const __global CLQuantum *p) {
return (
float)*(p+1); }
368 static inline float getPixelBlue(
const __global CLQuantum *p) {
return (
float)*(p+2); }
369 static inline float getPixelAlpha(
const __global CLQuantum *p,
const unsigned int number_channels) {
return (
float)*(p+number_channels-1); }
371 static inline void setPixelRed(__global CLQuantum *p,
const CLQuantum value) { *p=value; }
372 static inline void setPixelGreen(__global CLQuantum *p,
const CLQuantum value) { *(p+1)=value; }
373 static inline void setPixelBlue(__global CLQuantum *p,
const CLQuantum value) { *(p+2)=value; }
374 static inline void setPixelAlpha(__global CLQuantum *p,
const unsigned int number_channels,
const CLQuantum value) { *(p+number_channels-1)=value; }
376 static inline CLQuantum getBlue(CLPixelType p) {
return p.x; }
377 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
378 static inline float getBlueF4(float4 p) {
return p.x; }
379 static inline void setBlueF4(float4* p,
float value) { (*p).x = value; }
381 static inline CLQuantum getGreen(CLPixelType p) {
return p.y; }
382 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
383 static inline float getGreenF4(float4 p) {
return p.y; }
384 static inline void setGreenF4(float4* p,
float value) { (*p).y = value; }
386 static inline CLQuantum getRed(CLPixelType p) {
return p.z; }
387 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
388 static inline float getRedF4(float4 p) {
return p.z; }
389 static inline void setRedF4(float4* p,
float value) { (*p).z = value; }
391 static inline CLQuantum getAlpha(CLPixelType p) {
return p.w; }
392 static inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
393 static inline float getAlphaF4(float4 p) {
return p.w; }
394 static inline void setAlphaF4(float4* p,
float value) { (*p).w = value; }
396 static inline void ReadChannels(
const __global CLQuantum *p,
const unsigned int number_channels,
397 const ChannelType channel,
float *red,
float *green,
float *blue,
float *alpha)
402 if (number_channels > 2)
405 *green=getPixelGreen(p);
408 *blue=getPixelBlue(p);
411 if (((number_channels == 4) || (number_channels == 2)) &&
413 *alpha=getPixelAlpha(p,number_channels);
416 static inline float4 ReadAllChannels(
const __global CLQuantum *image,
const unsigned int number_channels,
417 const unsigned int columns,
const unsigned int x,
const unsigned int y)
419 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
423 pixel.x=getPixelRed(p);
425 if (number_channels > 2)
427 pixel.y=getPixelGreen(p);
428 pixel.z=getPixelBlue(p);
431 if ((number_channels == 4) || (number_channels == 2))
432 pixel.w=getPixelAlpha(p,number_channels);
436 static inline float4 ReadFloat4(
const __global CLQuantum *image,
const unsigned int number_channels,
437 const unsigned int columns,
const unsigned int x,
const unsigned int y,
const ChannelType channel)
439 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
446 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
447 return (float4)(red, green, blue, alpha);
450 static inline void WriteChannels(__global CLQuantum *p,
const unsigned int number_channels,
451 const ChannelType channel,
float red,
float green,
float blue,
float alpha)
453 if ((channel & RedChannel) != 0)
456 if (number_channels > 2)
458 if ((channel & GreenChannel) != 0)
465 if (((number_channels == 4) || (number_channels == 2)) &&
470 static inline void WriteAllChannels(__global CLQuantum *image,
const unsigned int number_channels,
471 const unsigned int columns,
const unsigned int x,
const unsigned int y, float4 pixel)
473 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
477 if (number_channels > 2)
483 if ((number_channels == 4) || (number_channels == 2))
487 static inline void WriteFloat4(__global CLQuantum *image,
const unsigned int number_channels,
488 const unsigned int columns,
const unsigned int x,
const unsigned int y,
const ChannelType channel,
491 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
492 WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
496 const unsigned int method,
float red,
float green,
float blue)
507 intensity=(red+green+blue)/3.0;
523 intensity=(float) (((
float) red*red+green*green+blue*blue)/
537 intensity=0.298839*red+0.586811*green+0.114350*blue;
550 intensity=0.298839*red+0.586811*green+0.114350*blue;
564 intensity=0.212656*red+0.715158*green+0.072186*blue;
577 intensity=0.212656*red+0.715158*green+0.072186*blue;
582 intensity=(float) (sqrt((
float) red*red+green*green+blue*blue)/
591 static inline int mirrorBottom(
int value)
593 return (value < 0) ? - (value) : value;
596 static inline int mirrorTop(
int value,
int width)
598 return (value >= width) ? (2 * width - value - 1) : value;
623 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
627 if( (v>=M) || (convert_float(v) < convert_float(a)) )
638 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
643 r=MWC_AddMod64(r,b,M);
644 b=MWC_AddMod64(b,b,M);
654 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
659 acc=MWC_MulMod64(acc,sqr,M);
660 sqr=MWC_MulMod64(sqr,sqr,M);
666 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
668 ulong m=MWC_PowMod64(A, distance, M);
669 ulong x=curr.x*(ulong)A+curr.y;
670 x=MWC_MulMod64(x, m, M);
671 return (uint2)((uint)(x/A), (uint)(x%A));
674 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
681 enum{ MWC_BASEID = 4077358422479273989UL };
683 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
684 ulong m=MWC_PowMod64(A, dist, M);
686 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
687 return (uint2)((uint)(x/A), (uint)(x%A));
691 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
693 void MWC64X_Step(mwc64x_state_t *s)
697 uint Xn=s->seed0*X+C;
698 uint carry=(uint)(Xn<C);
699 uint Cn=mad_hi(s->seed0,X,carry);
705 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
707 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
712 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
714 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
720 uint MWC64X_NextUint(mwc64x_state_t *s)
722 uint res=s->x ^ s->c;
731 float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
733 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
736 float mwcGenerateDifferentialNoise(mwc64x_state_t* r,
float pixel,
NoiseType noise_type,
float attenuate)
745 alpha=mwcReadPseudoRandomValue(r);
762 beta=mwcReadPseudoRandomValue(r);
763 gamma=sqrt(-2.0f*log(alpha));
764 sigma=gamma*cospi((2.0f*beta));
765 tau=gamma*sinpi((2.0f*beta));
801 if (alpha > MagickEpsilon)
802 sigma=sqrt(-2.0f*log(alpha));
803 beta=mwcReadPseudoRandomValue(r);
805 cospi((2.0f*beta))/2.0f);
814 for (i=0; alpha > poisson; i++)
816 beta=mwcReadPseudoRandomValue(r);
832 void AddNoise(
const __global CLQuantum *image,
833 const unsigned int number_channels,
const ChannelType channel,
834 const unsigned int length,
const unsigned int pixelsPerWorkItem,
835 const NoiseType noise_type,
const float attenuate,
const unsigned int seed0,
836 const unsigned int seed1,
const unsigned int numRandomNumbersPerPixel,
837 __global CLQuantum *filteredImage)
843 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
844 uint offset = span * get_local_size(0) * get_group_id(0);
845 MWC64X_SeedStreams(&rng, offset, span);
847 uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
848 uint count = pixelsPerWorkItem;
850 while (count > 0 && pos < length)
852 const __global CLQuantum *p = image + pos;
853 __global CLQuantum *q = filteredImage + pos;
860 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
862 if ((channel & RedChannel) != 0)
863 red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
865 if (number_channels > 2)
867 if ((channel & GreenChannel) != 0)
868 green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
870 if ((channel & BlueChannel) != 0)
871 blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
874 if (((number_channels == 4) || (number_channels == 2)) &&
875 ((channel & AlphaChannel) != 0))
876 alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
878 WriteChannels(q, number_channels, channel, red, green, blue, alpha);
880 pos += (get_local_size(0) * number_channels);
902 __kernel
void BlurRow(
const __global CLQuantum *image,
903 const unsigned int number_channels,
const ChannelType channel,
904 __constant
float *filter,
const unsigned int width,
905 const unsigned int imageColumns,
const unsigned int imageRows,
906 __local float4 *temp,__global float4 *tempImage)
908 const int x = get_global_id(0);
909 const int y = get_global_id(1);
911 const int columns = imageColumns;
913 const unsigned int radius = (width-1)/2;
914 const int wsize = get_local_size(0);
915 const unsigned int loadSize = wsize+width;
918 const int groupX=get_local_size(0)*get_group_id(0);
921 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
923 int cx = ClampToCanvas(i + groupX - radius, columns);
924 temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
928 barrier(CLK_LOCAL_MEM_FENCE);
931 if (get_global_id(0) < columns)
934 float4 result = (float4) 0;
938 for ( ; i+7 < width; )
940 for (
int j=0; j < 8; j++)
941 result+=filter[i+j]*temp[i+j+get_local_id(0)];
945 for ( ; i < width; i++)
946 result+=filter[i]*temp[i+get_local_id(0)];
949 tempImage[y*columns+x] = result;
958 __kernel
void BlurColumn(
const __global float4 *blurRowData,
959 const unsigned int number_channels,
const ChannelType channel,
960 __constant
float *filter,
const unsigned int width,
961 const unsigned int imageColumns,
const unsigned int imageRows,
962 __local float4 *temp,__global CLQuantum *filteredImage)
964 const int x = get_global_id(0);
965 const int y = get_global_id(1);
967 const int columns = imageColumns;
968 const int rows = imageRows;
970 unsigned int radius = (width-1)/2;
971 const int wsize = get_local_size(1);
972 const unsigned int loadSize = wsize+width;
975 const int groupX=get_local_size(0)*get_group_id(0);
976 const int groupY=get_local_size(1)*get_group_id(1);
981 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
982 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
985 barrier(CLK_LOCAL_MEM_FENCE);
988 if (get_global_id(1) < rows)
991 float4 result = (float4) 0;
995 for ( ; i+7 < width; )
997 for (
int j=0; j < 8; j++)
998 result+=filter[i+j]*temp[i+j+get_local_id(1)];
1002 for ( ; i < width; i++)
1003 result+=filter[i]*temp[i+get_local_id(1)];
1006 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
1033 float delta=tmax-tmin;
1035 result.y=delta/tmax;
1039 result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
1040 result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
1041 (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
1043 result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
1052 float saturation=pixel.y;
1053 float brightness=pixel.z;
1055 float4 result=pixel;
1057 if (saturation == 0.0f)
1063 float h=6.0f*(hue-floor(hue));
1065 float p=brightness*(1.0f-saturation);
1066 float q=brightness*(1.0f-saturation*f);
1067 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1110 __kernel
void Contrast(__global CLQuantum *image,
1111 const unsigned int number_channels,
const int sign)
1113 const int x=get_global_id(0);
1114 const int y=get_global_id(1);
1115 const unsigned int columns=get_global_size(0);
1117 float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
1118 if (number_channels < 3)
1119 pixel.y=pixel.z=pixel.x;
1122 float brightness=pixel.z;
1123 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1124 brightness=clamp(brightness,0.0f,1.0f);
1128 WriteAllChannels(image,number_channels,columns,x,y,pixel);
1147 __kernel
void Histogram(__global CLPixelType * restrict im,
1149 const unsigned int colorspace,
1150 const unsigned int method,
1151 __global uint4 * restrict histogram)
1153 const int x = get_global_id(0);
1154 const int y = get_global_id(1);
1155 const int columns = get_global_size(0);
1156 const int c = x + y * columns;
1159 float red=(float)getRed(im[c]);
1160 float green=(float)getGreen(im[c]);
1161 float blue=(float)getBlue(im[c]);
1165 atomic_inc((__global uint *)(&(histogram[pos]))+2);
1178 __kernel
void ContrastStretch(__global CLPixelType * restrict im,
1180 __global CLPixelType * restrict stretch_map,
1181 const float4 white,
const float4 black)
1183 const int x = get_global_id(0);
1184 const int y = get_global_id(1);
1185 const int columns = get_global_size(0);
1186 const int c = x + y * columns;
1189 CLPixelType oValue, eValue;
1190 CLQuantum red, green, blue, alpha;
1195 if ((channel & RedChannel) != 0)
1197 if (getRedF4(white) != getRedF4(black))
1199 ePos = ScaleQuantumToMap(getRed(oValue));
1200 eValue = stretch_map[ePos];
1201 red = getRed(eValue);
1205 if ((channel & GreenChannel) != 0)
1207 if (getGreenF4(white) != getGreenF4(black))
1209 ePos = ScaleQuantumToMap(getGreen(oValue));
1210 eValue = stretch_map[ePos];
1211 green = getGreen(eValue);
1215 if ((channel & BlueChannel) != 0)
1217 if (getBlueF4(white) != getBlueF4(black))
1219 ePos = ScaleQuantumToMap(getBlue(oValue));
1220 eValue = stretch_map[ePos];
1221 blue = getBlue(eValue);
1225 if ((channel & AlphaChannel) != 0)
1227 if (getAlphaF4(white) != getAlphaF4(black))
1229 ePos = ScaleQuantumToMap(getAlpha(oValue));
1230 eValue = stretch_map[ePos];
1231 alpha = getAlpha(eValue);
1236 im[c]=(CLPixelType)(blue, green, red, alpha);
1255 void ConvolveOptimized(
const __global CLPixelType *input, __global CLPixelType *output,
1256 const unsigned int imageWidth,
const unsigned int imageHeight,
1257 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1258 const uint matte,
const ChannelType channel, __local CLPixelType *pixelLocalCache, __local
float* filterCache) {
1261 blockID.x = get_global_id(0) / get_local_size(0);
1262 blockID.y = get_global_id(1) / get_local_size(1);
1266 imageAreaOrg.x = blockID.x * get_local_size(0);
1267 imageAreaOrg.y = blockID.y * get_local_size(1);
1269 int2 midFilterDimen;
1270 midFilterDimen.x = (filterWidth-1)/2;
1271 midFilterDimen.y = (filterHeight-1)/2;
1273 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1276 int2 cachedAreaDimen;
1277 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1278 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1281 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1282 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1283 int groupSize = get_local_size(0) * get_local_size(1);
1284 for (
int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1286 int2 cachedAreaIndex;
1287 cachedAreaIndex.x = i % cachedAreaDimen.x;
1288 cachedAreaIndex.y = i / cachedAreaDimen.x;
1290 int2 imagePixelIndex;
1291 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1295 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1296 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1298 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1302 for (
int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1303 filterCache[i] = filter[i];
1305 barrier(CLK_LOCAL_MEM_FENCE);
1309 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1310 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1313 if (imageIndex.x >= imageWidth
1314 || imageIndex.y >= imageHeight) {
1318 int filterIndex = 0;
1319 float4 sum = (float4)0.0f;
1321 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1322 int cacheIndexY = get_local_id(1);
1323 for (
int j = 0; j < filterHeight; j++) {
1324 int cacheIndexX = get_local_id(0);
1325 for (
int i = 0; i < filterWidth; i++) {
1326 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1327 float f = filterCache[filterIndex];
1342 int cacheIndexY = get_local_id(1);
1343 for (
int j = 0; j < filterHeight; j++) {
1344 int cacheIndexX = get_local_id(0);
1345 for (
int i = 0; i < filterWidth; i++) {
1347 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1349 float f = filterCache[filterIndex];
1350 float g = alpha * f;
1364 sum.xyz = gamma*sum.xyz;
1366 CLPixelType outputPixel;
1372 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1378 void Convolve(
const __global CLPixelType *input, __global CLPixelType *output,
1379 const uint imageWidth,
const uint imageHeight,
1380 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1384 imageIndex.x = get_global_id(0);
1385 imageIndex.y = get_global_id(1);
1391 if (imageIndex.x >= imageWidth
1392 || imageIndex.y >= imageHeight)
1395 int2 midFilterDimen;
1396 midFilterDimen.x = (filterWidth-1)/2;
1397 midFilterDimen.y = (filterHeight-1)/2;
1399 int filterIndex = 0;
1400 float4 sum = (float4)0.0f;
1402 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1403 for (
int j = 0; j < filterHeight; j++) {
1404 int2 inputPixelIndex;
1405 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1406 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1407 for (
int i = 0; i < filterWidth; i++) {
1408 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1409 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1411 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1412 float f = filter[filterIndex];
1427 for (
int j = 0; j < filterHeight; j++) {
1428 int2 inputPixelIndex;
1429 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1430 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1431 for (
int i = 0; i < filterWidth; i++) {
1432 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1433 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1435 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1437 float f = filter[filterIndex];
1438 float g = alpha * f;
1452 sum.xyz = gamma*sum.xyz;
1455 CLPixelType outputPixel;
1461 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1479 __kernel
void HullPass1(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1480 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1481 ,
const int2 offset,
const int polarity,
const int matte) {
1483 int x = get_global_id(0);
1484 int y = get_global_id(1);
1486 CLPixelType v = inputImage[y*imageWidth+x];
1489 neighbor.y = y + offset.y;
1490 neighbor.x = x + offset.x;
1492 int2 clampedNeighbor;
1493 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1494 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1496 CLPixelType r = (clampedNeighbor.x == neighbor.x
1497 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1513 \n #pragma unroll 4\n
1514 for (
unsigned int i = 0; i < 4; i++) {
1515 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1519 \n #pragma unroll 4\n
1520 for (
unsigned int i = 0; i < 4; i++) {
1521 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1526 v.x = (CLQuantum)sv[0];
1527 v.y = (CLQuantum)sv[1];
1528 v.z = (CLQuantum)sv[2];
1531 v.w = (CLQuantum)sv[3];
1533 outputImage[y*imageWidth+x] = v;
1542 __kernel
void HullPass2(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1543 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1544 ,
const int2 offset,
const int polarity,
const int matte) {
1546 int x = get_global_id(0);
1547 int y = get_global_id(1);
1549 CLPixelType v = inputImage[y*imageWidth+x];
1551 int2 neighbor, clampedNeighbor;
1553 neighbor.y = y + offset.y;
1554 neighbor.x = x + offset.x;
1555 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1556 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1558 CLPixelType r = (clampedNeighbor.x == neighbor.x
1559 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1563 neighbor.y = y - offset.y;
1564 neighbor.x = x - offset.x;
1565 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1566 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1568 CLPixelType s = (clampedNeighbor.x == neighbor.x
1569 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1592 \n #pragma unroll 4\n
1593 for (
unsigned int i = 0; i < 4; i++) {
1598 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1602 \n #pragma unroll 4\n
1603 for (
unsigned int i = 0; i < 4; i++) {
1607 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1611 v.x = (CLQuantum)sv[0];
1612 v.y = (CLQuantum)sv[1];
1613 v.z = (CLQuantum)sv[2];
1616 v.w = (CLQuantum)sv[3];
1618 outputImage[y*imageWidth+x] = v;
1638 __kernel
void Equalize(__global CLPixelType * restrict im,
1640 __global CLPixelType * restrict equalize_map,
1641 const float4 white,
const float4 black)
1643 const int x = get_global_id(0);
1644 const int y = get_global_id(1);
1645 const int columns = get_global_size(0);
1646 const int c = x + y * columns;
1649 CLPixelType oValue, eValue;
1650 CLQuantum red, green, blue, alpha;
1655 if ((channel & SyncChannels) != 0)
1657 if (getRedF4(white) != getRedF4(black))
1659 ePos = ScaleQuantumToMap(getRed(oValue));
1660 eValue = equalize_map[ePos];
1661 red = getRed(eValue);
1662 ePos = ScaleQuantumToMap(getGreen(oValue));
1663 eValue = equalize_map[ePos];
1664 green = getRed(eValue);
1665 ePos = ScaleQuantumToMap(getBlue(oValue));
1666 eValue = equalize_map[ePos];
1667 blue = getRed(eValue);
1668 ePos = ScaleQuantumToMap(getAlpha(oValue));
1669 eValue = equalize_map[ePos];
1670 alpha = getRed(eValue);
1673 im[c]=(CLPixelType)(blue, green, red, alpha);
1701 const unsigned int number_parameters,__constant
float *parameters)
1703 float result = 0.0f;
1709 for (
unsigned int i=0; i < number_parameters; i++)
1716 float freq,phase,ampl,bias;
1717 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1718 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1719 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1720 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1727 float width,range,center,bias;
1728 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1729 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1730 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1731 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1734 result = range/
MagickPI*asin(result)+bias;
1735 result = ( result <= -1.0f ) ? bias - range/2.0f : result;
1736 result = ( result >= 1.0f ) ? bias + range/2.0f : result;
1742 float slope,range,center,bias;
1743 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1744 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1745 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1746 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1766 __kernel
void ComputeFunction(__global CLQuantum *image,
const unsigned int number_channels,
1768 __constant
float *parameters)
1770 const unsigned int x = get_global_id(0);
1771 const unsigned int y = get_global_id(1);
1772 const unsigned int columns = get_global_size(0);
1773 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1780 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
1782 if ((channel & RedChannel) != 0)
1783 red=
ApplyFunction(red,
function, number_parameters, parameters);
1785 if (number_channels > 2)
1787 if ((channel & GreenChannel) != 0)
1788 green=
ApplyFunction(green,
function, number_parameters, parameters);
1790 if ((channel & BlueChannel) != 0)
1791 blue=
ApplyFunction(blue,
function, number_parameters, parameters);
1794 if (((number_channels == 4) || (number_channels == 2)) &&
1795 ((channel & AlphaChannel) != 0))
1796 alpha=
ApplyFunction(alpha,
function, number_parameters, parameters);
1798 WriteChannels(p, number_channels, channel, red, green, blue, alpha);
1815 __kernel
void Grayscale(__global CLQuantum *image,
const int number_channels,
1816 const unsigned int colorspace,
const unsigned int method)
1818 const unsigned int x = get_global_id(0);
1819 const unsigned int y = get_global_id(1);
1820 const unsigned int columns = get_global_size(0);
1821 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1829 green=getPixelGreen(p);
1830 blue=getPixelBlue(p);
1834 setPixelRed(p,intensity);
1835 setPixelGreen(p,intensity);
1836 setPixelBlue(p,intensity);
1854 __kernel
void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *tmpImage,
1856 const int imageWidth,
1857 const int imageHeight)
1859 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
1861 int x = get_local_id(0);
1862 int y = get_global_id(1);
1864 if ((x >= imageWidth) || (y >= imageHeight))
1867 global CLPixelType *src = srcImage + y * imageWidth;
1869 for (
int i = x; i < imageWidth; i += get_local_size(0)) {
1871 float weight = 1.0f;
1874 while ((j + 7) < i) {
1875 for (
int k = 0; k < 8; ++k)
1876 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
1881 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
1886 while ((j + 7) < radius + i) {
1887 for (
int k = 0; k < 8; ++k)
1888 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
1892 while (j < radius + i) {
1893 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
1898 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
1904 __kernel
void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *blurImage,
1906 const float strength,
1907 const int imageWidth,
1908 const int imageHeight)
1910 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
1912 int x = get_global_id(0);
1913 int y = get_global_id(1);
1915 if ((x >= imageWidth) || (y >= imageHeight))
1918 global
float *src = blurImage + x;
1921 float weight = 1.0f;
1924 while ((j + 7) < y) {
1925 for (
int k = 0; k < 8; ++k)
1926 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
1931 sum += weight * src[mirrorBottom(j) * imageWidth];
1936 while ((j + 7) < radius + y) {
1937 for (
int k = 0; k < 8; ++k)
1938 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
1942 while (j < radius + y) {
1943 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
1948 CLPixelType pixel = srcImage[x + y * imageWidth];
1949 float srcVal = dot(RGB, convert_float4(pixel));
1950 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
1951 mult = (srcVal + mult) / srcVal;
1957 dstImage[x + y * imageWidth] = pixel;
1975 static inline void ConvertRGBToHSL(
const CLQuantum red,
const CLQuantum green,
const CLQuantum blue,
1976 float *hue,
float *saturation,
float *lightness)
1991 *lightness=(tmax+tmin)/2.0;
2012 if (*lightness <= 0.5)
2013 *saturation=c/(2.0*(*lightness));
2015 *saturation=c/(2.0-2.0*(*lightness));
2018 static inline void ConvertHSLToRGB(
const float hue,
const float saturation,
const float lightness,
2019 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2034 if (lightness <= 0.5)
2035 c=2.0*lightness*saturation;
2037 c=(2.0-2.0*lightness)*saturation;
2038 tmin=lightness-0.5*c;
2039 h-=360.0*floor(h/360.0);
2041 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2042 switch ((
int) floor(h) % 6)
2098 static inline void ModulateHSL(
const float percent_hue,
const float percent_saturation,
const float percent_lightness,
2099 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2110 hue+=0.5*(0.01*percent_hue-1.0);
2115 saturation*=0.01*percent_saturation;
2116 lightness*=0.01*percent_lightness;
2120 __kernel
void Modulate(__global CLPixelType *im,
2121 const float percent_brightness,
2122 const float percent_hue,
2123 const float percent_saturation,
2124 const int colorspace)
2127 const int x = get_global_id(0);
2128 const int y = get_global_id(1);
2129 const int columns = get_global_size(0);
2130 const int c = x + y * columns;
2132 CLPixelType pixel = im[c];
2140 green=getGreen(pixel);
2141 blue=getBlue(pixel);
2148 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2149 &red, &green, &blue);
2154 CLPixelType filteredPixel;
2156 setRed(&filteredPixel, red);
2157 setGreen(&filteredPixel, green);
2158 setBlue(&filteredPixel, blue);
2159 filteredPixel.w = pixel.w;
2161 im[c] = filteredPixel;
2179 void MotionBlur(
const __global CLPixelType *input, __global CLPixelType *output,
2180 const unsigned int imageWidth,
const unsigned int imageHeight,
2181 const __global
float *filter,
const unsigned int width,
const __global int2* offset,
2183 const ChannelType channel,
const unsigned int matte) {
2186 currentPixel.x = get_global_id(0);
2187 currentPixel.y = get_global_id(1);
2189 if (currentPixel.x >= imageWidth
2190 || currentPixel.y >= imageHeight)
2194 pixel.x = (float)bias.x;
2195 pixel.y = (
float)bias.y;
2196 pixel.z = (float)bias.z;
2197 pixel.w = (
float)bias.w;
2199 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
2201 for (
int i = 0; i < width; i++) {
2204 int2 samplePixel = currentPixel + offset[i];
2205 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2206 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2207 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2209 pixel.x += (filter[i] * (float)samplePixelValue.x);
2210 pixel.y += (filter[i] * (float)samplePixelValue.y);
2211 pixel.z += (filter[i] * (float)samplePixelValue.z);
2212 pixel.w += (filter[i] * (float)samplePixelValue.w);
2215 CLPixelType outputPixel;
2220 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2225 for (
int i = 0; i < width; i++) {
2228 int2 samplePixel = currentPixel + offset[i];
2229 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2230 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2232 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2235 float k = filter[i];
2236 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2237 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2238 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2240 pixel.w += k * alpha * samplePixelValue.w;
2245 pixel.xyz = gamma*pixel.xyz;
2247 CLPixelType outputPixel;
2252 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2271 float BoxResizeFilter(
const float x)
2279 float CubicBC(
const float x,
const __global
float* resizeFilterCoefficients)
2311 return(resizeFilterCoefficients[0]+x*(x*
2312 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2314 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2315 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2321 float Sinc(
const float x)
2325 const float alpha=(float) (
MagickPI*x);
2326 return sinpi(x)/alpha;
2340 return ((x<1.0f)?(1.0f-x):0.0f);
2346 float Hann(
const float x)
2352 const float cosine=cos((
MagickPI*x));
2353 return(0.5f+0.5f*cosine);
2364 const float cosine=cos((
MagickPI*x));
2365 return(0.54f+0.46f*cosine);
2379 const float cosine=cos((
MagickPI*x));
2380 return(0.34f+cosine*(0.5f+cosine*0.16f));
2385 static inline float applyResizeFilter(
const float x,
const ResizeWeightingFunctionType filterType,
const __global
float* filterCoefficients)
2395 return CubicBC(x,filterCoefficients);
2397 return BoxResizeFilter(x);
2415 static inline float getResizeFilterWeight(
const __global
float* resizeFilterCubicCoefficients,
const ResizeWeightingFunctionType resizeFilterType
2417 ,
const float resizeFilterScale,
const float resizeWindowSupport,
const float resizeFilterBlur,
const float x)
2420 float xBlur = fabs(x/resizeFilterBlur);
2421 if (resizeWindowSupport < MagickEpsilon
2428 scale = resizeFilterScale;
2429 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2431 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2438 const char *accelerateKernels2 =
2442 static inline unsigned int getNumWorkItemsPerPixel(
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2443 return (numWorkItems/pixelPerWorkgroup);
2448 static inline int pixelToCompute(
const unsigned itemID,
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2449 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2450 int pixelIndex = itemID/numWorkItemsPerPixel;
2451 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2458 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2459 void ResizeHorizontalFilter(
const __global CLQuantum *inputImage,
const unsigned int number_channels,
2460 const unsigned int inputColumns,
const unsigned int inputRows, __global CLQuantum *filteredImage,
2461 const unsigned int filteredColumns,
const unsigned int filteredRows,
const float xFactor,
2462 const int resizeFilterType,
const int resizeWindowType,
const __global
float *resizeFilterCubicCoefficients,
2463 const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
2464 const float resizeFilterBlur, __local CLQuantum *inputImageCache,
const int numCachedPixels,
2465 const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize,
2466 __local float4 *outputPixelCache, __local
float *densityCache, __local
float *gammaCache)
2469 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2470 const unsigned int stopX =
MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2471 const unsigned int actualNumPixelToCompute = stopX - startX;
2474 float scale =
MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2475 const float support =
MagickMax(scale*resizeFilterSupport,0.5f);
2478 const int cacheRangeStartX =
MagickMax((
int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(
int)(0));
2479 const int cacheRangeEndX =
MagickMin((
int)(cacheRangeStartX + numCachedPixels), (
int)inputColumns);
2482 const unsigned int y = get_global_id(1);
2483 const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
2484 const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
2485 event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
2486 wait_group_events(1, &e);
2488 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2489 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2490 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2492 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2493 const unsigned int chunkStopX =
MagickMin(chunkStartX + pixelChunkSize, stopX);
2494 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2497 const unsigned int itemID = get_local_id(0);
2498 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2500 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2502 float4 filteredPixel = (float4)0.0f;
2503 float density = 0.0f;
2506 if (pixelIndex != -1)
2509 const int x = chunkStartX + pixelIndex;
2513 const unsigned int start = (
unsigned int)
MagickMax(bisect-support+0.5f,0.0f);
2514 const unsigned int stop = (
unsigned int)
MagickMin(bisect+support+0.5f,(
float)inputColumns);
2515 const unsigned int n = stop - start;
2518 unsigned int numStepsPerWorkItem = n / numItems;
2519 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2521 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2524 const unsigned int stopStep =
MagickMin(startStep+numStepsPerWorkItem, n);
2526 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2527 for (
unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2529 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2532 resizeFilterScale, resizeFilterWindowSupport,
2533 resizeFilterBlur, scale*(start + i - bisect + 0.5));
2535 float4 cp = (float4)0.0f;
2537 __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
2538 cp.x = (float) *(p);
2539 if (number_channels > 2)
2541 cp.y = (float) *(p + 1);
2542 cp.z = (float) *(p + 2);
2545 if (alpha_index != 0)
2547 cp.w = (float) *(p + alpha_index);
2551 filteredPixel.x += alpha * cp.x;
2552 filteredPixel.y += alpha * cp.y;
2553 filteredPixel.z += alpha * cp.z;
2554 filteredPixel.w += weight * cp.w;
2558 filteredPixel += ((float4) weight)*cp;
2566 if (itemID < actualNumPixelInThisChunk) {
2567 outputPixelCache[itemID] = (float4)0.0f;
2568 densityCache[itemID] = 0.0f;
2569 if (alpha_index != 0)
2570 gammaCache[itemID] = 0.0f;
2572 barrier(CLK_LOCAL_MEM_FENCE);
2575 for (
unsigned int i = 0; i < numItems; i++) {
2576 if (pixelIndex != -1) {
2577 if (itemID%numItems == i) {
2578 outputPixelCache[pixelIndex]+=filteredPixel;
2579 densityCache[pixelIndex]+=density;
2580 if (alpha_index != 0)
2581 gammaCache[pixelIndex]+=gamma;
2584 barrier(CLK_LOCAL_MEM_FENCE);
2587 if (itemID < actualNumPixelInThisChunk)
2589 float4 filteredPixel = outputPixelCache[itemID];
2592 if (alpha_index != 0)
2593 gamma = gammaCache[itemID];
2595 float density = densityCache[itemID];
2596 if ((density != 0.0f) && (density != 1.0f))
2599 filteredPixel *= (float4) density;
2600 if (alpha_index != 0)
2604 if (alpha_index != 0)
2607 filteredPixel.x *= gamma;
2608 filteredPixel.y *= gamma;
2609 filteredPixel.z *= gamma;
2612 WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
2620 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2621 void ResizeVerticalFilter(const __global CLQuantum *inputImage, const
unsigned int number_channels,
2622 const
unsigned int inputColumns, const
unsigned int inputRows, __global CLQuantum *filteredImage,
2623 const
unsigned int filteredColumns, const
unsigned int filteredRows, const
float yFactor,
2624 const
int resizeFilterType, const
int resizeWindowType, const __global
float *resizeFilterCubicCoefficients,
2625 const
float resizeFilterScale, const
float resizeFilterSupport, const
float resizeFilterWindowSupport,
2626 const
float resizeFilterBlur, __local CLQuantum *inputImageCache, const
int numCachedPixels,
2627 const
unsigned int pixelPerWorkgroup, const
unsigned int pixelChunkSize,
2628 __local float4 *outputPixelCache, __local
float *densityCache, __local
float *gammaCache)
2631 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2632 const unsigned int stopY =
MagickMin(startY + pixelPerWorkgroup,filteredRows);
2633 const unsigned int actualNumPixelToCompute = stopY - startY;
2636 float scale =
MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2637 const float support =
MagickMax(scale*resizeFilterSupport,0.5f);
2640 const int cacheRangeStartY =
MagickMax((
int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(
int)(0));
2641 const int cacheRangeEndY =
MagickMin((
int)(cacheRangeStartY + numCachedPixels), (
int)inputRows);
2644 const unsigned int x = get_global_id(0);
2645 unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
2646 unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
2647 unsigned int stride = inputColumns * number_channels;
2648 for (
unsigned int i = 0; i < number_channels; i++)
2650 event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
2651 wait_group_events(1,&e);
2654 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2655 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2656 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2658 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2659 const unsigned int chunkStopY =
MagickMin(chunkStartY + pixelChunkSize, stopY);
2660 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2663 const unsigned int itemID = get_local_id(1);
2664 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2666 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2668 float4 filteredPixel = (float4)0.0f;
2669 float density = 0.0f;
2672 if (pixelIndex != -1)
2675 const int y = chunkStartY + pixelIndex;
2679 const unsigned int start = (
unsigned int)
MagickMax(bisect-support+0.5f,0.0f);
2680 const unsigned int stop = (
unsigned int)
MagickMin(bisect+support+0.5f,(
float)inputRows);
2681 const unsigned int n = stop - start;
2684 unsigned int numStepsPerWorkItem = n / numItems;
2685 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2687 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2690 const unsigned int stopStep =
MagickMin(startStep+numStepsPerWorkItem, n);
2692 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2693 for (
unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2695 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2698 resizeFilterScale, resizeFilterWindowSupport,
2699 resizeFilterBlur, scale*(start + i - bisect + 0.5));
2701 float4 cp = (float4)0.0f;
2703 __local CLQuantum *p = inputImageCache + cacheIndex;
2704 cp.x = (float) *(p);
2705 if (number_channels > 2)
2707 cp.y = (float) *(p + rangeLength);
2708 cp.z = (float) *(p + (rangeLength * 2));
2711 if (alpha_index != 0)
2713 cp.w = (float) *(p + (rangeLength * alpha_index));
2717 filteredPixel.x += alpha * cp.x;
2718 filteredPixel.y += alpha * cp.y;
2719 filteredPixel.z += alpha * cp.z;
2720 filteredPixel.w += weight * cp.w;
2724 filteredPixel += ((float4) weight)*cp;
2732 if (itemID < actualNumPixelInThisChunk) {
2733 outputPixelCache[itemID] = (float4)0.0f;
2734 densityCache[itemID] = 0.0f;
2735 if (alpha_index != 0)
2736 gammaCache[itemID] = 0.0f;
2738 barrier(CLK_LOCAL_MEM_FENCE);
2741 for (
unsigned int i = 0; i < numItems; i++) {
2742 if (pixelIndex != -1) {
2743 if (itemID%numItems == i) {
2744 outputPixelCache[pixelIndex]+=filteredPixel;
2745 densityCache[pixelIndex]+=density;
2746 if (alpha_index != 0)
2747 gammaCache[pixelIndex]+=gamma;
2750 barrier(CLK_LOCAL_MEM_FENCE);
2753 if (itemID < actualNumPixelInThisChunk)
2755 float4 filteredPixel = outputPixelCache[itemID];
2758 if (alpha_index != 0)
2759 gamma = gammaCache[itemID];
2761 float density = densityCache[itemID];
2762 if ((density != 0.0f) && (density != 1.0f))
2765 filteredPixel *= (float4) density;
2766 if (alpha_index != 0)
2770 if (alpha_index != 0)
2773 filteredPixel.x *= gamma;
2774 filteredPixel.y *= gamma;
2775 filteredPixel.z *= gamma;
2778 WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
2797 __kernel
void RotationalBlur(
const __global CLQuantum *image,
2798 const unsigned int number_channels,
const unsigned int channel,
2799 const float2 blurCenter,__constant
float *cos_theta,
2800 __constant
float *sin_theta,
const unsigned int cossin_theta_size,
2801 __global CLQuantum *filteredImage)
2803 const int x = get_global_id(0);
2804 const int y = get_global_id(1);
2805 const int columns = get_global_size(0);
2806 const int rows = get_global_size(1);
2807 unsigned int step = 1;
2808 float center_x = (float) x - blurCenter.x;
2809 float center_y = (
float) y - blurCenter.y;
2810 float radius = hypot(center_x, center_y);
2812 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2814 if (radius > MagickEpsilon)
2816 step = (
unsigned int) (blur_radius / radius);
2819 if (step >= cossin_theta_size)
2820 step = cossin_theta_size-1;
2823 float4 result = 0.0f;
2824 float normalize = 0.0f;
2827 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2829 int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
2830 int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
2832 float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
2834 if ((number_channels == 4) || (number_channels == 2))
2840 result.x += alpha * pixel.x;
2841 result.y += alpha * pixel.y;
2842 result.z += alpha * pixel.z;
2843 result.w += pixel.w;
2853 if ((number_channels == 4) || (number_channels == 2))
2859 result.w *= normalize;
2862 result *= normalize;
2864 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
2881 __kernel
void UnsharpMaskBlurColumn(
const __global CLQuantum* image,
2882 const __global float4 *blurRowData,
const unsigned int number_channels,
2883 const ChannelType channel,
const unsigned int columns,
2884 const unsigned int rows,__local float4* cachedData,
2885 __local
float* cachedFilter,
const __global
float *filter,
2886 const unsigned int width,
const float gain,
const float threshold,
2887 __global CLQuantum *filteredImage)
2889 const unsigned int radius = (width-1)/2;
2892 const int groupX = get_group_id(0);
2893 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
2894 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
2896 if ((groupStartY >= 0) && (groupStopY < rows))
2898 event_t e = async_work_group_strided_copy(cachedData,
2899 blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
2900 wait_group_events(1,&e);
2904 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
2905 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
2907 barrier(CLK_LOCAL_MEM_FENCE);
2910 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
2911 wait_group_events(1,&e);
2914 const int cy = get_global_id(1);
2918 float4 blurredPixel = (float4) 0.0f;
2922 for ( ; i+7 < width; )
2924 for (
int j=0; j < 8; j++, i++)
2925 blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
2928 for ( ; i < width; i++)
2929 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
2931 float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
2932 float4 outputPixel = inputImagePixel - blurredPixel;
2936 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
2937 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
2940 WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
2946 __kernel
void UnsharpMask(
const __global CLQuantum *image,
const unsigned int number_channels,
2947 const ChannelType channel,__constant
float *filter,
const unsigned int width,
2948 const unsigned int columns,
const unsigned int rows,__local float4 *pixels,
2949 const float gain,
const float threshold,__global CLQuantum *filteredImage)
2951 const unsigned int x = get_global_id(0);
2952 const unsigned int y = get_global_id(1);
2954 const unsigned int radius = (width - 1) / 2;
2956 int row = y - radius;
2957 int baseRow = get_group_id(1) * get_local_size(1) - radius;
2958 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
2960 while (row < endRow) {
2961 int srcy = (row < 0) ? -row : row;
2962 srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
2964 float4 value = 0.0f;
2966 int ix = x - radius;
2969 while (i + 7 < width) {
2970 for (
int j = 0; j < 8; ++j) {
2972 srcx = (srcx < 0) ? -srcx : srcx;
2973 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2974 value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2981 int srcx = (ix < 0) ? -ix : ix;
2982 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2983 value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2987 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
2988 row += get_local_size(1);
2991 barrier(CLK_LOCAL_MEM_FENCE);
2993 const int px = get_local_id(0);
2994 const int py = get_local_id(1);
2995 const int prp = get_local_size(0);
2996 float4 value = (float4)(0.0f);
2999 while (i + 7 < width) {
3000 for (
int j = 0; j < 8; ++j)
3001 value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
3005 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3009 if ((x < columns) && (y < rows)) {
3010 float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
3011 float4 diff = srcPixel - value;
3015 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3016 value = select(srcPixel + diff * gain, srcPixel, mask);
3018 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
3024 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
3025 void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
3026 const
unsigned int number_channels,const
unsigned int max_channels,
3027 const
float threshold,const
int passes,const
unsigned int imageWidth,
3028 const
unsigned int imageHeight)
3030 const int pad = (1 << (passes - 1));
3031 const int tileSize = 64;
3032 const int tileRowPixels = 64;
3033 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3035 CLQuantum stage[48];
3037 local
float buffer[64 * 64];
3039 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3040 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3042 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3043 int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
3044 (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
3046 for (
int channel = 0; channel < max_channels; ++channel)
3047 stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
3050 for (
int channel = 0; channel < max_channels; ++channel) {
3052 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3053 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
3061 for (
int i = 0; i < 16; i++)
3064 for (
int pass = 0; pass < passes; ++pass) {
3065 const int radius = 1 << pass;
3066 const int x = get_local_id(0);
3067 const float thresh = threshold * noise[pass];
3070 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3071 const int offset = i * tileRowPixels;
3073 tmp[i / 4] = buffer[x + offset];
3074 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3075 barrier(CLK_LOCAL_MEM_FENCE);
3076 buffer[x + offset] = pixel;
3078 barrier(CLK_LOCAL_MEM_FENCE);
3081 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3082 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3083 float delta = tmp[i / 4] - pixel;
3085 if (delta < -thresh)
3087 else if (delta > thresh)
3091 accum[i / 4] += delta;
3093 barrier(CLK_LOCAL_MEM_FENCE);
3095 if (pass < passes - 1)
3096 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3097 buffer[x + i * tileRowPixels] = tmp[i / 4];
3099 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3100 accum[i / 4] += tmp[i / 4];
3101 barrier(CLK_LOCAL_MEM_FENCE);
3104 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3107 barrier(CLK_LOCAL_MEM_FENCE);
3112 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3113 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3114 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3115 int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
3116 for (
int channel = 0; channel < max_channels; ++channel) {
3117 dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
3127 #endif // MAGICKCORE_OPENCL_SUPPORT
3129 #if defined(__cplusplus) || defined(c_plusplus)
3133 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
Definition: composite.h:84
Definition: composite.h:72
Definition: composite.h:78
Definition: resize-private.h:31
Definition: colorspace.h:57
Definition: composite.h:28
Definition: resize-private.h:37
Definition: visual-effects.h:34
MagickExport void ConvertRGBToHSL(const double red, const double green, const double blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1099
Definition: statistic.h:124
Definition: resize-private.h:33
Definition: composite.h:91
Definition: colorspace.h:50
Definition: composite.h:30
Definition: composite.h:68
Definition: colorspace.h:46
Definition: colorspace.h:53
Definition: composite.h:95
Definition: composite.h:33
static double Hann(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:322
Definition: resize-private.h:40
Definition: composite.h:96
Definition: resize-private.h:29
PixelIntensityMethod
Definition: pixel.h:96
Definition: composite.h:54
static double Blackman(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:149
Definition: colorspace.h:32
Definition: composite.h:64
Definition: composite.h:83
Definition: composite.h:27
Definition: colorspace.h:33
Definition: colorspace.h:58
Definition: composite.h:35
Definition: composite.h:52
#define MagickPI
Definition: image-private.h:40
Definition: colorspace.h:36
Definition: colorspace.h:44
Definition: colorspace.h:47
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, double *red, double *green, double *blue)
Definition: enhance.c:3525
Definition: statistic.h:127
Definition: colorspace.h:45
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:32
Definition: composite.h:49
Definition: colorspace.h:55
NoiseType
Definition: visual-effects.h:27
Definition: resize-private.h:38
#define MagickEpsilon
Definition: magick-type.h:114
Definition: colorspace.h:42
static Quantum ClampToQuantum(const MagickRealType quantum)
Definition: quantum.h:85
Definition: statistic.h:125
MagickPrivate void ConvertRGBToHSB(const double, const double, const double, double *, double *, double *)
Definition: visual-effects.h:36
Definition: visual-effects.h:35
Definition: resize-private.h:30
Definition: colorspace.h:39
Definition: composite.h:47
Definition: resize-private.h:41
Definition: composite.h:89
Definition: composite.h:76
Definition: composite.h:88
Definition: colorspace.h:52
Definition: composite.h:42
Definition: colorspace.h:37
Definition: composite.h:48
Definition: colorspace.h:54
Definition: colorspace.h:34
static double PerceptibleReciprocal(const double x)
Definition: pixel-accessor.h:234
Definition: composite.h:50
Definition: composite.h:38
Definition: composite.h:82
Definition: composite.h:74
Definition: composite.h:87
Definition: resize-private.h:32
Definition: composite.h:51
Definition: composite.h:60
Definition: composite.h:85
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:968
Definition: colorspace.h:59
Definition: composite.h:53
Definition: resize-private.h:36
Definition: colorspace.h:51
static void Contrast(const int sign, double *red, double *green, double *blue)
Definition: enhance.c:1371
#define SigmaMultiplicativeGaussian
Definition: visual-effects.h:30
Definition: composite.h:56
Definition: composite.h:46
Definition: statistic.h:123
Definition: composite.h:79
Definition: colorspace.h:40
Definition: composite.h:94
static double Triangle(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:543
Definition: resize-private.h:34
#define QuantumScale
Definition: magick-type.h:119
Definition: colorspace.h:41
Definition: resize-private.h:39
Definition: composite.h:71
ChannelType
Definition: pixel.h:33
Definition: colorspace.h:29
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, double *red, double *green, double *blue)
Definition: gem.c:462
#define MaxMap
Definition: magick-type.h:79
static double RoundToUnity(const double value)
Definition: composite-private.h:47
#define MagickMax(x, y)
Definition: image-private.h:36
Definition: composite.h:67
Definition: composite.h:39
Definition: composite.h:45
Definition: composite.h:44
Definition: composite.h:86
Definition: resize-private.h:28
Definition: composite.h:97
Definition: composite.h:41
Definition: composite.h:59
Definition: colorspace.h:28
Definition: composite.h:93
Definition: colorspace.h:43
Definition: composite.h:70
Definition: composite.h:92
Definition: visual-effects.h:32
Definition: colorspace.h:48
Definition: resize-private.h:42
static double Sinc(const double, const ResizeFilter *)
Definition: composite.h:57
Definition: composite.h:36
Definition: composite.h:43
static double CubicBC(const double x, const ResizeFilter *resize_filter)
Definition: resize.c:207
Definition: visual-effects.h:31
Definition: composite.h:37
Definition: composite.h:66
Definition: statistic.h:126
ResizeWeightingFunctionType
Definition: resize-private.h:25
Definition: composite.h:98
Definition: colorspace.h:49
static double Hamming(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:334
#define MagickMin(x, y)
Definition: image-private.h:37
ColorspaceType
Definition: colorspace.h:25
Definition: composite.h:32
Definition: colorspace.h:30
Definition: composite.h:31
Definition: colorspace.h:35
Definition: composite.h:55
Definition: composite.h:75
Definition: colorspace.h:31
CompositeOperator
Definition: composite.h:25
Definition: composite.h:77
Definition: colorspace.h:60
Definition: colorspace.h:38
Definition: composite.h:80
Definition: composite.h:29
Definition: colorspace.h:56
MagickPrivate void ConvertHSBToRGB(const double, const double, const double, double *, double *, double *)
Definition: composite.h:62
Definition: composite.h:73
Definition: composite.h:63
Definition: composite.h:69
Definition: composite.h:34
Definition: visual-effects.h:29
Definition: resize-private.h:27
Definition: composite.h:90
Definition: colorspace.h:27
MagickExport MagickRealType GetPixelIntensity(const Image *magick_restrict image, const Quantum *magick_restrict pixel)
Definition: pixel.c:2358
MagickFunction
Definition: statistic.h:121
Definition: composite.h:40
Definition: composite.h:81
Definition: resize-private.h:35
#define QuantumRange
Definition: magick-type.h:87
Definition: composite.h:58
Definition: composite.h:65
Definition: composite.h:61
Definition: visual-effects.h:33