19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
22 #if defined(__cplusplus) || defined(c_plusplus)
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
38 const char *accelerateKernels =
51 OPENCL_DEFINE(
MagickMax(x,y), (((x) > (y)) ? (x) : (y)))
52 OPENCL_DEFINE(
MagickMin(x,y), (((x) < (y)) ? (x) : (y)))
280 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
282 return((CLQuantum) value);
289 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
291 return((CLQuantum) (257.0f*value));
298 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
300 return((CLQuantum) (16843009.0*value));
306 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
320 return (CLQuantum) (clamp(value, 0.0f,
QuantumRange) + 0.5f);
327 static inline
int ClampToCanvas(const
int offset,const
int range)
329 return clamp(offset, (
int)0, range-1);
334 static inline uint ScaleQuantumToMap(CLQuantum value)
336 if (value >= (CLQuantum)
MaxMap)
337 return ((uint)MaxMap);
339 return ((uint)value);
346 float sign = x < (float) 0.0 ? (
float) -1.0 : (float) 1.0;
353 static inline unsigned int getPixelIndex(
const unsigned int number_channels,
354 const unsigned int columns,
const unsigned int x,
const unsigned int y)
356 return (x * number_channels) + (y * columns * number_channels);
359 static inline float getPixelRed(
const __global CLQuantum *p) {
return (
float)*p; }
360 static inline float getPixelGreen(
const __global CLQuantum *p) {
return (
float)*(p+1); }
361 static inline float getPixelBlue(
const __global CLQuantum *p) {
return (
float)*(p+2); }
362 static inline float getPixelAlpha(
const __global CLQuantum *p,
const unsigned int number_channels) {
return (
float)*(p+number_channels-1); }
364 static inline void setPixelRed(__global CLQuantum *p,
const CLQuantum value) { *p=value; }
365 static inline void setPixelGreen(__global CLQuantum *p,
const CLQuantum value) { *(p+1)=value; }
366 static inline void setPixelBlue(__global CLQuantum *p,
const CLQuantum value) { *(p+2)=value; }
367 static inline void setPixelAlpha(__global CLQuantum *p,
const unsigned int number_channels,
const CLQuantum value) { *(p+number_channels-1)=value; }
369 static inline CLQuantum getBlue(CLPixelType p) {
return p.x; }
370 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
371 static inline float getBlueF4(float4 p) {
return p.x; }
372 static inline void setBlueF4(float4* p,
float value) { (*p).x = value; }
374 static inline CLQuantum getGreen(CLPixelType p) {
return p.y; }
375 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
376 static inline float getGreenF4(float4 p) {
return p.y; }
377 static inline void setGreenF4(float4* p,
float value) { (*p).y = value; }
379 static inline CLQuantum getRed(CLPixelType p) {
return p.z; }
380 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
381 static inline float getRedF4(float4 p) {
return p.z; }
382 static inline void setRedF4(float4* p,
float value) { (*p).z = value; }
384 static inline CLQuantum getAlpha(CLPixelType p) {
return p.w; }
385 static inline void setAlpha(CLPixelType* p, CLQuantum value) { (*p).w = value; }
386 static inline float getAlphaF4(float4 p) {
return p.w; }
387 static inline void setAlphaF4(float4* p,
float value) { (*p).w = value; }
389 static inline void ReadChannels(
const __global CLQuantum *p,
const unsigned int number_channels,
390 const ChannelType channel,
float *red,
float *green,
float *blue,
float *alpha)
395 if (number_channels > 2)
398 *green=getPixelGreen(p);
401 *blue=getPixelBlue(p);
404 if (((number_channels == 4) || (number_channels == 2)) &&
406 *alpha=getPixelAlpha(p,number_channels);
409 static inline float4 ReadAllChannels(
const __global CLQuantum *image,
const unsigned int number_channels,
410 const unsigned int columns,
const unsigned int x,
const unsigned int y)
412 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
416 pixel.x=getPixelRed(p);
418 if (number_channels > 2)
420 pixel.y=getPixelGreen(p);
421 pixel.z=getPixelBlue(p);
424 if ((number_channels == 4) || (number_channels == 2))
425 pixel.w=getPixelAlpha(p,number_channels);
429 static inline float4 ReadFloat4(
const __global CLQuantum *image,
const unsigned int number_channels,
430 const unsigned int columns,
const unsigned int x,
const unsigned int y,
const ChannelType channel)
432 const __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
439 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
440 return (float4)(red, green, blue, alpha);
443 static inline void WriteChannels(__global CLQuantum *p,
const unsigned int number_channels,
444 const ChannelType channel,
float red,
float green,
float blue,
float alpha)
446 if ((channel & RedChannel) != 0)
449 if (number_channels > 2)
451 if ((channel & GreenChannel) != 0)
458 if (((number_channels == 4) || (number_channels == 2)) &&
463 static inline void WriteAllChannels(__global CLQuantum *image,
const unsigned int number_channels,
464 const unsigned int columns,
const unsigned int x,
const unsigned int y, float4 pixel)
466 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
470 if (number_channels > 2)
476 if ((number_channels == 4) || (number_channels == 2))
480 static inline void WriteFloat4(__global CLQuantum *image,
const unsigned int number_channels,
481 const unsigned int columns,
const unsigned int x,
const unsigned int y,
const ChannelType channel,
484 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
485 WriteChannels(p, number_channels, channel, pixel.x, pixel.y, pixel.z, pixel.w);
489 const unsigned int method,
float red,
float green,
float blue)
500 intensity=(red+green+blue)/3.0;
516 intensity=(float) (((
float) red*red+green*green+blue*blue)/
530 intensity=0.298839*red+0.586811*green+0.114350*blue;
543 intensity=0.298839*red+0.586811*green+0.114350*blue;
557 intensity=0.212656*red+0.715158*green+0.072186*blue;
570 intensity=0.212656*red+0.715158*green+0.072186*blue;
575 intensity=(float) (sqrt((
float) red*red+green*green+blue*blue)/
584 static inline int mirrorBottom(
int value)
586 return (value < 0) ? - (value) : value;
589 static inline int mirrorTop(
int value,
int width)
591 return (value >= width) ? (2 * width - value - 1) : value;
616 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
620 if( (v>=M) || (convert_float(v) < convert_float(a)) )
631 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
636 r=MWC_AddMod64(r,b,M);
637 b=MWC_AddMod64(b,b,M);
647 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
652 acc=MWC_MulMod64(acc,sqr,M);
653 sqr=MWC_MulMod64(sqr,sqr,M);
659 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
661 ulong m=MWC_PowMod64(A, distance, M);
662 ulong x=curr.x*(ulong)A+curr.y;
663 x=MWC_MulMod64(x, m, M);
664 return (uint2)((uint)(x/A), (uint)(x%A));
667 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
674 enum{ MWC_BASEID = 4077358422479273989UL };
676 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
677 ulong m=MWC_PowMod64(A, dist, M);
679 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
680 return (uint2)((uint)(x/A), (uint)(x%A));
684 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
686 void MWC64X_Step(mwc64x_state_t *s)
690 uint Xn=s->seed0*X+C;
691 uint carry=(uint)(Xn<C);
692 uint Cn=mad_hi(s->seed0,X,carry);
698 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
700 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
705 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
707 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
713 uint MWC64X_NextUint(mwc64x_state_t *s)
715 uint res=s->x ^ s->c;
724 float mwcReadPseudoRandomValue(mwc64x_state_t* rng)
726 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
729 float mwcGenerateDifferentialNoise(mwc64x_state_t* r,
float pixel,
NoiseType noise_type,
float attenuate)
738 alpha=mwcReadPseudoRandomValue(r);
755 beta=mwcReadPseudoRandomValue(r);
756 gamma=sqrt(-2.0f*log(alpha));
757 sigma=gamma*cospi((2.0f*beta));
758 tau=gamma*sinpi((2.0f*beta));
794 if (alpha > MagickEpsilon)
795 sigma=sqrt(-2.0f*log(alpha));
796 beta=mwcReadPseudoRandomValue(r);
798 cospi((2.0f*beta))/2.0f);
807 for (i=0; alpha > poisson; i++)
809 beta=mwcReadPseudoRandomValue(r);
825 void AddNoise(
const __global CLQuantum *image,
826 const unsigned int number_channels,
const ChannelType channel,
827 const unsigned int length,
const unsigned int pixelsPerWorkItem,
828 const NoiseType noise_type,
const float attenuate,
const unsigned int seed0,
829 const unsigned int seed1,
const unsigned int numRandomNumbersPerPixel,
830 __global CLQuantum *filteredImage)
836 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
837 uint offset = span * get_local_size(0) * get_group_id(0);
838 MWC64X_SeedStreams(&rng, offset, span);
840 uint pos = get_group_id(0) * get_local_size(0) * pixelsPerWorkItem * number_channels + (get_local_id(0) * number_channels);
841 uint count = pixelsPerWorkItem;
843 while (count > 0 && pos < length)
845 const __global CLQuantum *p = image + pos;
846 __global CLQuantum *q = filteredImage + pos;
853 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
855 if ((channel & RedChannel) != 0)
856 red=mwcGenerateDifferentialNoise(&rng,red,noise_type,attenuate);
858 if (number_channels > 2)
860 if ((channel & GreenChannel) != 0)
861 green=mwcGenerateDifferentialNoise(&rng,green,noise_type,attenuate);
863 if ((channel & BlueChannel) != 0)
864 blue=mwcGenerateDifferentialNoise(&rng,blue,noise_type,attenuate);
867 if (((number_channels == 4) || (number_channels == 2)) &&
868 ((channel & AlphaChannel) != 0))
869 alpha=mwcGenerateDifferentialNoise(&rng,alpha,noise_type,attenuate);
871 WriteChannels(q, number_channels, channel, red, green, blue, alpha);
873 pos += (get_local_size(0) * number_channels);
895 __kernel
void BlurRow(
const __global CLQuantum *image,
896 const unsigned int number_channels,
const ChannelType channel,
897 __constant
float *filter,
const unsigned int width,
898 const unsigned int imageColumns,
const unsigned int imageRows,
899 __local float4 *temp,__global float4 *tempImage)
901 const int x = get_global_id(0);
902 const int y = get_global_id(1);
904 const int columns = imageColumns;
906 const unsigned int radius = (width-1)/2;
907 const int wsize = get_local_size(0);
908 const unsigned int loadSize = wsize+width;
911 const int groupX=get_local_size(0)*get_group_id(0);
914 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
916 int cx = ClampToCanvas(i + groupX - radius, columns);
917 temp[i] = ReadFloat4(image, number_channels, columns, cx, y, channel);
921 barrier(CLK_LOCAL_MEM_FENCE);
924 if (get_global_id(0) < columns)
927 float4 result = (float4) 0;
931 for ( ; i+7 < width; )
933 for (
int j=0; j < 8; j++)
934 result+=filter[i+j]*temp[i+j+get_local_id(0)];
938 for ( ; i < width; i++)
939 result+=filter[i]*temp[i+get_local_id(0)];
942 tempImage[y*columns+x] = result;
951 __kernel
void BlurColumn(
const __global float4 *blurRowData,
952 const unsigned int number_channels,
const ChannelType channel,
953 __constant
float *filter,
const unsigned int width,
954 const unsigned int imageColumns,
const unsigned int imageRows,
955 __local float4 *temp,__global CLQuantum *filteredImage)
957 const int x = get_global_id(0);
958 const int y = get_global_id(1);
960 const int columns = imageColumns;
961 const int rows = imageRows;
963 unsigned int radius = (width-1)/2;
964 const int wsize = get_local_size(1);
965 const unsigned int loadSize = wsize+width;
968 const int groupX=get_local_size(0)*get_group_id(0);
969 const int groupY=get_local_size(1)*get_group_id(1);
974 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
975 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
978 barrier(CLK_LOCAL_MEM_FENCE);
981 if (get_global_id(1) < rows)
984 float4 result = (float4) 0;
988 for ( ; i+7 < width; )
990 for (
int j=0; j < 8; j++)
991 result+=filter[i+j]*temp[i+j+get_local_id(1)];
995 for ( ; i < width; i++)
996 result+=filter[i]*temp[i+get_local_id(1)];
999 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
1026 float delta=tmax-tmin;
1028 result.y=delta/tmax;
1032 result.x =((pixel.x == tmax) ? 0.0f : ((pixel.y == tmax) ? 2.0f : 4.0f));
1033 result.x+=((pixel.x == tmax) ? (pixel.y-pixel.z) : ((pixel.y == tmax) ?
1034 (pixel.z-pixel.x) : (pixel.x-pixel.y)))/delta;
1036 result.x+=(result.x < 0.0f) ? 0.0f : 1.0f;
1045 float saturation=pixel.y;
1046 float brightness=pixel.z;
1048 float4 result=pixel;
1050 if (saturation == 0.0f)
1056 float h=6.0f*(hue-floor(hue));
1058 float p=brightness*(1.0f-saturation);
1059 float q=brightness*(1.0f-saturation*f);
1060 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1103 __kernel
void Contrast(__global CLQuantum *image,
1104 const unsigned int number_channels,
const int sign)
1106 const int x=get_global_id(0);
1107 const int y=get_global_id(1);
1108 const unsigned int columns=get_global_size(0);
1110 float4 pixel=ReadAllChannels(image,number_channels,columns,x,y);
1111 if (number_channels < 3)
1112 pixel.y=pixel.z=pixel.x;
1115 float brightness=pixel.z;
1116 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1117 brightness=clamp(brightness,0.0f,1.0f);
1121 WriteAllChannels(image,number_channels,columns,x,y,pixel);
1140 __kernel
void Histogram(__global CLPixelType * restrict im,
1142 const unsigned int colorspace,
1143 const unsigned int method,
1144 __global uint4 * restrict histogram)
1146 const int x = get_global_id(0);
1147 const int y = get_global_id(1);
1148 const int columns = get_global_size(0);
1149 const int c = x + y * columns;
1152 float red=(float)getRed(im[c]);
1153 float green=(float)getGreen(im[c]);
1154 float blue=(float)getBlue(im[c]);
1158 atomic_inc((__global uint *)(&(histogram[pos]))+2);
1171 __kernel
void ContrastStretch(__global CLPixelType * restrict im,
1173 __global CLPixelType * restrict stretch_map,
1174 const float4 white,
const float4 black)
1176 const int x = get_global_id(0);
1177 const int y = get_global_id(1);
1178 const int columns = get_global_size(0);
1179 const int c = x + y * columns;
1182 CLPixelType oValue, eValue;
1183 CLQuantum red, green, blue, alpha;
1188 if ((channel & RedChannel) != 0)
1190 if (getRedF4(white) != getRedF4(black))
1192 ePos = ScaleQuantumToMap(getRed(oValue));
1193 eValue = stretch_map[ePos];
1194 red = getRed(eValue);
1198 if ((channel & GreenChannel) != 0)
1200 if (getGreenF4(white) != getGreenF4(black))
1202 ePos = ScaleQuantumToMap(getGreen(oValue));
1203 eValue = stretch_map[ePos];
1204 green = getGreen(eValue);
1208 if ((channel & BlueChannel) != 0)
1210 if (getBlueF4(white) != getBlueF4(black))
1212 ePos = ScaleQuantumToMap(getBlue(oValue));
1213 eValue = stretch_map[ePos];
1214 blue = getBlue(eValue);
1218 if ((channel & AlphaChannel) != 0)
1220 if (getAlphaF4(white) != getAlphaF4(black))
1222 ePos = ScaleQuantumToMap(getAlpha(oValue));
1223 eValue = stretch_map[ePos];
1224 alpha = getAlpha(eValue);
1229 im[c]=(CLPixelType)(blue, green, red, alpha);
1248 void ConvolveOptimized(
const __global CLPixelType *input, __global CLPixelType *output,
1249 const unsigned int imageWidth,
const unsigned int imageHeight,
1250 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1251 const uint matte,
const ChannelType channel, __local CLPixelType *pixelLocalCache, __local
float* filterCache) {
1254 blockID.x = get_global_id(0) / get_local_size(0);
1255 blockID.y = get_global_id(1) / get_local_size(1);
1259 imageAreaOrg.x = blockID.x * get_local_size(0);
1260 imageAreaOrg.y = blockID.y * get_local_size(1);
1262 int2 midFilterDimen;
1263 midFilterDimen.x = (filterWidth-1)/2;
1264 midFilterDimen.y = (filterHeight-1)/2;
1266 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1269 int2 cachedAreaDimen;
1270 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1271 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1274 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1275 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1276 int groupSize = get_local_size(0) * get_local_size(1);
1277 for (
int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1279 int2 cachedAreaIndex;
1280 cachedAreaIndex.x = i % cachedAreaDimen.x;
1281 cachedAreaIndex.y = i / cachedAreaDimen.x;
1283 int2 imagePixelIndex;
1284 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1288 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1289 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1291 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1295 for (
int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1296 filterCache[i] = filter[i];
1298 barrier(CLK_LOCAL_MEM_FENCE);
1302 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1303 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1306 if (imageIndex.x >= imageWidth
1307 || imageIndex.y >= imageHeight) {
1311 int filterIndex = 0;
1312 float4 sum = (float4)0.0f;
1314 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1315 int cacheIndexY = get_local_id(1);
1316 for (
int j = 0; j < filterHeight; j++) {
1317 int cacheIndexX = get_local_id(0);
1318 for (
int i = 0; i < filterWidth; i++) {
1319 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1320 float f = filterCache[filterIndex];
1335 int cacheIndexY = get_local_id(1);
1336 for (
int j = 0; j < filterHeight; j++) {
1337 int cacheIndexX = get_local_id(0);
1338 for (
int i = 0; i < filterWidth; i++) {
1340 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1342 float f = filterCache[filterIndex];
1343 float g = alpha * f;
1357 sum.xyz = gamma*sum.xyz;
1359 CLPixelType outputPixel;
1365 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1371 void Convolve(
const __global CLPixelType *input, __global CLPixelType *output,
1372 const uint imageWidth,
const uint imageHeight,
1373 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1377 imageIndex.x = get_global_id(0);
1378 imageIndex.y = get_global_id(1);
1384 if (imageIndex.x >= imageWidth
1385 || imageIndex.y >= imageHeight)
1388 int2 midFilterDimen;
1389 midFilterDimen.x = (filterWidth-1)/2;
1390 midFilterDimen.y = (filterHeight-1)/2;
1392 int filterIndex = 0;
1393 float4 sum = (float4)0.0f;
1395 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
1396 for (
int j = 0; j < filterHeight; j++) {
1397 int2 inputPixelIndex;
1398 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1399 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1400 for (
int i = 0; i < filterWidth; i++) {
1401 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1402 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1404 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1405 float f = filter[filterIndex];
1420 for (
int j = 0; j < filterHeight; j++) {
1421 int2 inputPixelIndex;
1422 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1423 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1424 for (
int i = 0; i < filterWidth; i++) {
1425 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1426 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1428 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1430 float f = filter[filterIndex];
1431 float g = alpha * f;
1445 sum.xyz = gamma*sum.xyz;
1448 CLPixelType outputPixel;
1454 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1472 __kernel
void HullPass1(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1473 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1474 ,
const int2 offset,
const int polarity,
const int matte) {
1476 int x = get_global_id(0);
1477 int y = get_global_id(1);
1479 CLPixelType v = inputImage[y*imageWidth+x];
1482 neighbor.y = y + offset.y;
1483 neighbor.x = x + offset.x;
1485 int2 clampedNeighbor;
1486 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1487 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1489 CLPixelType r = (clampedNeighbor.x == neighbor.x
1490 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1506 \n #pragma unroll 4\n
1507 for (
unsigned int i = 0; i < 4; i++) {
1508 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1512 \n #pragma unroll 4\n
1513 for (
unsigned int i = 0; i < 4; i++) {
1514 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1519 v.x = (CLQuantum)sv[0];
1520 v.y = (CLQuantum)sv[1];
1521 v.z = (CLQuantum)sv[2];
1524 v.w = (CLQuantum)sv[3];
1526 outputImage[y*imageWidth+x] = v;
1535 __kernel
void HullPass2(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1536 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1537 ,
const int2 offset,
const int polarity,
const int matte) {
1539 int x = get_global_id(0);
1540 int y = get_global_id(1);
1542 CLPixelType v = inputImage[y*imageWidth+x];
1544 int2 neighbor, clampedNeighbor;
1546 neighbor.y = y + offset.y;
1547 neighbor.x = x + offset.x;
1548 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1549 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1551 CLPixelType r = (clampedNeighbor.x == neighbor.x
1552 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1556 neighbor.y = y - offset.y;
1557 neighbor.x = x - offset.x;
1558 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1559 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1561 CLPixelType s = (clampedNeighbor.x == neighbor.x
1562 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1585 \n #pragma unroll 4\n
1586 for (
unsigned int i = 0; i < 4; i++) {
1591 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1595 \n #pragma unroll 4\n
1596 for (
unsigned int i = 0; i < 4; i++) {
1600 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1604 v.x = (CLQuantum)sv[0];
1605 v.y = (CLQuantum)sv[1];
1606 v.z = (CLQuantum)sv[2];
1609 v.w = (CLQuantum)sv[3];
1611 outputImage[y*imageWidth+x] = v;
1631 __kernel
void Equalize(__global CLPixelType * restrict im,
1633 __global CLPixelType * restrict equalize_map,
1634 const float4 white,
const float4 black)
1636 const int x = get_global_id(0);
1637 const int y = get_global_id(1);
1638 const int columns = get_global_size(0);
1639 const int c = x + y * columns;
1642 CLPixelType oValue, eValue;
1643 CLQuantum red, green, blue, alpha;
1648 if ((channel & SyncChannels) != 0)
1650 if (getRedF4(white) != getRedF4(black))
1652 ePos = ScaleQuantumToMap(getRed(oValue));
1653 eValue = equalize_map[ePos];
1654 red = getRed(eValue);
1655 ePos = ScaleQuantumToMap(getGreen(oValue));
1656 eValue = equalize_map[ePos];
1657 green = getRed(eValue);
1658 ePos = ScaleQuantumToMap(getBlue(oValue));
1659 eValue = equalize_map[ePos];
1660 blue = getRed(eValue);
1661 ePos = ScaleQuantumToMap(getAlpha(oValue));
1662 eValue = equalize_map[ePos];
1663 alpha = getRed(eValue);
1666 im[c]=(CLPixelType)(blue, green, red, alpha);
1694 const unsigned int number_parameters,__constant
float *parameters)
1696 float result = 0.0f;
1702 for (
unsigned int i=0; i < number_parameters; i++)
1709 float freq,phase,ampl,bias;
1710 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1711 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1712 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1713 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1720 float width,range,center,bias;
1721 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1722 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1723 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1724 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1727 result = range/
MagickPI*asin(result)+bias;
1728 result = ( result <= -1.0f ) ? bias - range/2.0f : result;
1729 result = ( result >= 1.0f ) ? bias + range/2.0f : result;
1735 float slope,range,center,bias;
1736 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1737 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1738 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1739 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1759 __kernel
void ComputeFunction(__global CLQuantum *image,
const unsigned int number_channels,
1761 __constant
float *parameters)
1763 const unsigned int x = get_global_id(0);
1764 const unsigned int y = get_global_id(1);
1765 const unsigned int columns = get_global_size(0);
1766 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1773 ReadChannels(p, number_channels, channel, &red, &green, &blue, &alpha);
1775 if ((channel & RedChannel) != 0)
1776 red=
ApplyFunction(red,
function, number_parameters, parameters);
1778 if (number_channels > 2)
1780 if ((channel & GreenChannel) != 0)
1781 green=
ApplyFunction(green,
function, number_parameters, parameters);
1783 if ((channel & BlueChannel) != 0)
1784 blue=
ApplyFunction(blue,
function, number_parameters, parameters);
1787 if (((number_channels == 4) || (number_channels == 2)) &&
1788 ((channel & AlphaChannel) != 0))
1789 alpha=
ApplyFunction(alpha,
function, number_parameters, parameters);
1791 WriteChannels(p, number_channels, channel, red, green, blue, alpha);
1808 __kernel
void Grayscale(__global CLQuantum *image,
const int number_channels,
1809 const unsigned int colorspace,
const unsigned int method)
1811 const unsigned int x = get_global_id(0);
1812 const unsigned int y = get_global_id(1);
1813 const unsigned int columns = get_global_size(0);
1814 __global CLQuantum *p = image + getPixelIndex(number_channels, columns, x, y);
1822 green=getPixelGreen(p);
1823 blue=getPixelBlue(p);
1827 setPixelRed(p,intensity);
1828 setPixelGreen(p,intensity);
1829 setPixelBlue(p,intensity);
1847 __kernel
void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *tmpImage,
1849 const int imageWidth,
1850 const int imageHeight)
1852 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
1854 int x = get_local_id(0);
1855 int y = get_global_id(1);
1857 if ((x >= imageWidth) || (y >= imageHeight))
1860 global CLPixelType *src = srcImage + y * imageWidth;
1862 for (
int i = x; i < imageWidth; i += get_local_size(0)) {
1864 float weight = 1.0f;
1867 while ((j + 7) < i) {
1868 for (
int k = 0; k < 8; ++k)
1869 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
1874 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
1879 while ((j + 7) < radius + i) {
1880 for (
int k = 0; k < 8; ++k)
1881 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
1885 while (j < radius + i) {
1886 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
1891 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
1897 __kernel
void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *blurImage,
1899 const float strength,
1900 const int imageWidth,
1901 const int imageHeight)
1903 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
1905 int x = get_global_id(0);
1906 int y = get_global_id(1);
1908 if ((x >= imageWidth) || (y >= imageHeight))
1911 global
float *src = blurImage + x;
1914 float weight = 1.0f;
1917 while ((j + 7) < y) {
1918 for (
int k = 0; k < 8; ++k)
1919 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
1924 sum += weight * src[mirrorBottom(j) * imageWidth];
1929 while ((j + 7) < radius + y) {
1930 for (
int k = 0; k < 8; ++k)
1931 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
1935 while (j < radius + y) {
1936 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
1941 CLPixelType pixel = srcImage[x + y * imageWidth];
1942 float srcVal = dot(RGB, convert_float4(pixel));
1943 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
1944 mult = (srcVal + mult) / srcVal;
1950 dstImage[x + y * imageWidth] = pixel;
1968 static inline void ConvertRGBToHSL(
const CLQuantum red,
const CLQuantum green,
const CLQuantum blue,
1969 float *hue,
float *saturation,
float *lightness)
1984 *lightness=(tmax+tmin)/2.0;
2005 if (*lightness <= 0.5)
2006 *saturation=c/(2.0*(*lightness));
2008 *saturation=c/(2.0-2.0*(*lightness));
2011 static inline void ConvertHSLToRGB(
const float hue,
const float saturation,
const float lightness,
2012 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2027 if (lightness <= 0.5)
2028 c=2.0*lightness*saturation;
2030 c=(2.0-2.0*lightness)*saturation;
2031 tmin=lightness-0.5*c;
2032 h-=360.0*floor(h/360.0);
2034 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2035 switch ((
int) floor(h) % 6)
2091 static inline void ModulateHSL(
const float percent_hue,
const float percent_saturation,
const float percent_lightness,
2092 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2103 hue+=0.5*(0.01*percent_hue-1.0);
2108 saturation*=0.01*percent_saturation;
2109 lightness*=0.01*percent_lightness;
2113 __kernel
void Modulate(__global CLPixelType *im,
2114 const float percent_brightness,
2115 const float percent_hue,
2116 const float percent_saturation,
2117 const int colorspace)
2120 const int x = get_global_id(0);
2121 const int y = get_global_id(1);
2122 const int columns = get_global_size(0);
2123 const int c = x + y * columns;
2125 CLPixelType pixel = im[c];
2133 green=getGreen(pixel);
2134 blue=getBlue(pixel);
2141 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2142 &red, &green, &blue);
2147 CLPixelType filteredPixel;
2149 setRed(&filteredPixel, red);
2150 setGreen(&filteredPixel, green);
2151 setBlue(&filteredPixel, blue);
2152 filteredPixel.w = pixel.w;
2154 im[c] = filteredPixel;
2172 void MotionBlur(
const __global CLPixelType *input, __global CLPixelType *output,
2173 const unsigned int imageWidth,
const unsigned int imageHeight,
2174 const __global
float *filter,
const unsigned int width,
const __global int2* offset,
2176 const ChannelType channel,
const unsigned int matte) {
2179 currentPixel.x = get_global_id(0);
2180 currentPixel.y = get_global_id(1);
2182 if (currentPixel.x >= imageWidth
2183 || currentPixel.y >= imageHeight)
2187 pixel.x = (float)bias.x;
2188 pixel.y = (
float)bias.y;
2189 pixel.z = (float)bias.z;
2190 pixel.w = (
float)bias.w;
2192 if (((channel & AlphaChannel) == 0) || (matte == 0)) {
2194 for (
int i = 0; i < width; i++) {
2197 int2 samplePixel = currentPixel + offset[i];
2198 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2199 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2200 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2202 pixel.x += (filter[i] * (float)samplePixelValue.x);
2203 pixel.y += (filter[i] * (float)samplePixelValue.y);
2204 pixel.z += (filter[i] * (float)samplePixelValue.z);
2205 pixel.w += (filter[i] * (float)samplePixelValue.w);
2208 CLPixelType outputPixel;
2213 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2218 for (
int i = 0; i < width; i++) {
2221 int2 samplePixel = currentPixel + offset[i];
2222 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2223 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2225 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2228 float k = filter[i];
2229 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2230 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2231 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2233 pixel.w += k * alpha * samplePixelValue.w;
2238 pixel.xyz = gamma*pixel.xyz;
2240 CLPixelType outputPixel;
2245 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2264 float BoxResizeFilter(
const float x)
2272 float CubicBC(
const float x,
const __global
float* resizeFilterCoefficients)
2304 return(resizeFilterCoefficients[0]+x*(x*
2305 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2307 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2308 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2314 float Sinc(
const float x)
2318 const float alpha=(float) (
MagickPI*x);
2319 return sinpi(x)/alpha;
2333 return ((x<1.0f)?(1.0f-x):0.0f);
2339 float Hann(
const float x)
2345 const float cosine=cos((
MagickPI*x));
2346 return(0.5f+0.5f*cosine);
2357 const float cosine=cos((
MagickPI*x));
2358 return(0.54f+0.46f*cosine);
2372 const float cosine=cos((
MagickPI*x));
2373 return(0.34f+cosine*(0.5f+cosine*0.16f));
2378 static inline float applyResizeFilter(
const float x,
const ResizeWeightingFunctionType filterType,
const __global
float* filterCoefficients)
2388 return CubicBC(x,filterCoefficients);
2390 return BoxResizeFilter(x);
2408 static inline float getResizeFilterWeight(
const __global
float* resizeFilterCubicCoefficients,
const ResizeWeightingFunctionType resizeFilterType
2410 ,
const float resizeFilterScale,
const float resizeWindowSupport,
const float resizeFilterBlur,
const float x)
2413 float xBlur = fabs(x/resizeFilterBlur);
2414 if (resizeWindowSupport < MagickEpsilon
2421 scale = resizeFilterScale;
2422 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2424 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2431 const char *accelerateKernels2 =
2435 static inline unsigned int getNumWorkItemsPerPixel(
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2436 return (numWorkItems/pixelPerWorkgroup);
2441 static inline int pixelToCompute(
const unsigned itemID,
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2442 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2443 int pixelIndex = itemID/numWorkItemsPerPixel;
2444 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2451 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2452 void ResizeHorizontalFilter(
const __global CLQuantum *inputImage,
const unsigned int number_channels,
2453 const unsigned int inputColumns,
const unsigned int inputRows, __global CLQuantum *filteredImage,
2454 const unsigned int filteredColumns,
const unsigned int filteredRows,
const float xFactor,
2455 const int resizeFilterType,
const int resizeWindowType,
const __global
float *resizeFilterCubicCoefficients,
2456 const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
2457 const float resizeFilterBlur, __local CLQuantum *inputImageCache,
const int numCachedPixels,
2458 const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize,
2459 __local float4 *outputPixelCache, __local
float *densityCache, __local
float *gammaCache)
2462 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2463 const unsigned int stopX =
MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2464 const unsigned int actualNumPixelToCompute = stopX - startX;
2467 float scale =
MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2468 const float support =
MagickMax(scale*resizeFilterSupport,0.5f);
2471 const int cacheRangeStartX =
MagickMax((
int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(
int)(0));
2472 const int cacheRangeEndX =
MagickMin((
int)(cacheRangeStartX + numCachedPixels), (
int)inputColumns);
2475 const unsigned int y = get_global_id(1);
2476 const unsigned int pos = getPixelIndex(number_channels, inputColumns, cacheRangeStartX, y);
2477 const unsigned int num_elements = (cacheRangeEndX - cacheRangeStartX) * number_channels;
2478 event_t e = async_work_group_copy(inputImageCache, inputImage + pos, num_elements, 0);
2479 wait_group_events(1, &e);
2481 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2482 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2483 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2485 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2486 const unsigned int chunkStopX =
MagickMin(chunkStartX + pixelChunkSize, stopX);
2487 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2490 const unsigned int itemID = get_local_id(0);
2491 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2493 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2495 float4 filteredPixel = (float4)0.0f;
2496 float density = 0.0f;
2499 if (pixelIndex != -1)
2502 const int x = chunkStartX + pixelIndex;
2506 const unsigned int start = (
unsigned int)
MagickMax(bisect-support+0.5f,0.0f);
2507 const unsigned int stop = (
unsigned int)
MagickMin(bisect+support+0.5f,(
float)inputColumns);
2508 const unsigned int n = stop - start;
2511 unsigned int numStepsPerWorkItem = n / numItems;
2512 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2514 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2517 const unsigned int stopStep =
MagickMin(startStep+numStepsPerWorkItem, n);
2519 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2520 for (
unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2522 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2525 resizeFilterScale, resizeFilterWindowSupport,
2526 resizeFilterBlur, scale*(start + i - bisect + 0.5));
2528 float4 cp = (float4)0.0f;
2530 __local CLQuantum *p = inputImageCache + (cacheIndex*number_channels);
2531 cp.x = (float) *(p);
2532 if (number_channels > 2)
2534 cp.y = (float) *(p + 1);
2535 cp.z = (float) *(p + 2);
2538 if (alpha_index != 0)
2540 cp.w = (float) *(p + alpha_index);
2544 filteredPixel.x += alpha * cp.x;
2545 filteredPixel.y += alpha * cp.y;
2546 filteredPixel.z += alpha * cp.z;
2547 filteredPixel.w += weight * cp.w;
2551 filteredPixel += ((float4) weight)*cp;
2559 if (itemID < actualNumPixelInThisChunk) {
2560 outputPixelCache[itemID] = (float4)0.0f;
2561 densityCache[itemID] = 0.0f;
2562 if (alpha_index != 0)
2563 gammaCache[itemID] = 0.0f;
2565 barrier(CLK_LOCAL_MEM_FENCE);
2568 for (
unsigned int i = 0; i < numItems; i++) {
2569 if (pixelIndex != -1) {
2570 if (itemID%numItems == i) {
2571 outputPixelCache[pixelIndex]+=filteredPixel;
2572 densityCache[pixelIndex]+=density;
2573 if (alpha_index != 0)
2574 gammaCache[pixelIndex]+=gamma;
2577 barrier(CLK_LOCAL_MEM_FENCE);
2580 if (itemID < actualNumPixelInThisChunk)
2582 float4 filteredPixel = outputPixelCache[itemID];
2585 if (alpha_index != 0)
2586 gamma = gammaCache[itemID];
2588 float density = densityCache[itemID];
2589 if ((density != 0.0f) && (density != 1.0f))
2592 filteredPixel *= (float4) density;
2593 if (alpha_index != 0)
2597 if (alpha_index != 0)
2600 filteredPixel.x *= gamma;
2601 filteredPixel.y *= gamma;
2602 filteredPixel.z *= gamma;
2605 WriteAllChannels(filteredImage, number_channels, filteredColumns, chunkStartX + itemID, y, filteredPixel);
2613 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2614 void ResizeVerticalFilter(const __global CLQuantum *inputImage, const
unsigned int number_channels,
2615 const
unsigned int inputColumns, const
unsigned int inputRows, __global CLQuantum *filteredImage,
2616 const
unsigned int filteredColumns, const
unsigned int filteredRows, const
float yFactor,
2617 const
int resizeFilterType, const
int resizeWindowType, const __global
float *resizeFilterCubicCoefficients,
2618 const
float resizeFilterScale, const
float resizeFilterSupport, const
float resizeFilterWindowSupport,
2619 const
float resizeFilterBlur, __local CLQuantum *inputImageCache, const
int numCachedPixels,
2620 const
unsigned int pixelPerWorkgroup, const
unsigned int pixelChunkSize,
2621 __local float4 *outputPixelCache, __local
float *densityCache, __local
float *gammaCache)
2624 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2625 const unsigned int stopY =
MagickMin(startY + pixelPerWorkgroup,filteredRows);
2626 const unsigned int actualNumPixelToCompute = stopY - startY;
2629 float scale =
MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2630 const float support =
MagickMax(scale*resizeFilterSupport,0.5f);
2633 const int cacheRangeStartY =
MagickMax((
int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(
int)(0));
2634 const int cacheRangeEndY =
MagickMin((
int)(cacheRangeStartY + numCachedPixels), (
int)inputRows);
2637 const unsigned int x = get_global_id(0);
2638 unsigned int pos = getPixelIndex(number_channels, inputColumns, x, cacheRangeStartY);
2639 unsigned int rangeLength = cacheRangeEndY-cacheRangeStartY;
2640 unsigned int stride = inputColumns * number_channels;
2641 for (
unsigned int i = 0; i < number_channels; i++)
2643 event_t e = async_work_group_strided_copy(inputImageCache + (rangeLength*i), inputImage+pos+i, rangeLength, stride, 0);
2644 wait_group_events(1,&e);
2647 unsigned int alpha_index = (number_channels == 4) || (number_channels == 2) ? number_channels - 1 : 0;
2648 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2649 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2651 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2652 const unsigned int chunkStopY =
MagickMin(chunkStartY + pixelChunkSize, stopY);
2653 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2656 const unsigned int itemID = get_local_id(1);
2657 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2659 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2661 float4 filteredPixel = (float4)0.0f;
2662 float density = 0.0f;
2665 if (pixelIndex != -1)
2668 const int y = chunkStartY + pixelIndex;
2672 const unsigned int start = (
unsigned int)
MagickMax(bisect-support+0.5f,0.0f);
2673 const unsigned int stop = (
unsigned int)
MagickMin(bisect+support+0.5f,(
float)inputRows);
2674 const unsigned int n = stop - start;
2677 unsigned int numStepsPerWorkItem = n / numItems;
2678 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2680 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2683 const unsigned int stopStep =
MagickMin(startStep+numStepsPerWorkItem, n);
2685 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
2686 for (
unsigned int i = startStep; i < stopStep; i++, cacheIndex++)
2688 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,
2691 resizeFilterScale, resizeFilterWindowSupport,
2692 resizeFilterBlur, scale*(start + i - bisect + 0.5));
2694 float4 cp = (float4)0.0f;
2696 __local CLQuantum *p = inputImageCache + cacheIndex;
2697 cp.x = (float) *(p);
2698 if (number_channels > 2)
2700 cp.y = (float) *(p + rangeLength);
2701 cp.z = (float) *(p + (rangeLength * 2));
2704 if (alpha_index != 0)
2706 cp.w = (float) *(p + (rangeLength * alpha_index));
2710 filteredPixel.x += alpha * cp.x;
2711 filteredPixel.y += alpha * cp.y;
2712 filteredPixel.z += alpha * cp.z;
2713 filteredPixel.w += weight * cp.w;
2717 filteredPixel += ((float4) weight)*cp;
2725 if (itemID < actualNumPixelInThisChunk) {
2726 outputPixelCache[itemID] = (float4)0.0f;
2727 densityCache[itemID] = 0.0f;
2728 if (alpha_index != 0)
2729 gammaCache[itemID] = 0.0f;
2731 barrier(CLK_LOCAL_MEM_FENCE);
2734 for (
unsigned int i = 0; i < numItems; i++) {
2735 if (pixelIndex != -1) {
2736 if (itemID%numItems == i) {
2737 outputPixelCache[pixelIndex]+=filteredPixel;
2738 densityCache[pixelIndex]+=density;
2739 if (alpha_index != 0)
2740 gammaCache[pixelIndex]+=gamma;
2743 barrier(CLK_LOCAL_MEM_FENCE);
2746 if (itemID < actualNumPixelInThisChunk)
2748 float4 filteredPixel = outputPixelCache[itemID];
2751 if (alpha_index != 0)
2752 gamma = gammaCache[itemID];
2754 float density = densityCache[itemID];
2755 if ((density != 0.0f) && (density != 1.0f))
2758 filteredPixel *= (float4) density;
2759 if (alpha_index != 0)
2763 if (alpha_index != 0)
2766 filteredPixel.x *= gamma;
2767 filteredPixel.y *= gamma;
2768 filteredPixel.z *= gamma;
2771 WriteAllChannels(filteredImage, number_channels, filteredColumns, x, chunkStartY + itemID, filteredPixel);
2790 __kernel
void RotationalBlur(
const __global CLQuantum *image,
2791 const unsigned int number_channels,
const unsigned int channel,
2792 const float2 blurCenter,__constant
float *cos_theta,
2793 __constant
float *sin_theta,
const unsigned int cossin_theta_size,
2794 __global CLQuantum *filteredImage)
2796 const int x = get_global_id(0);
2797 const int y = get_global_id(1);
2798 const int columns = get_global_size(0);
2799 const int rows = get_global_size(1);
2800 unsigned int step = 1;
2801 float center_x = (float) x - blurCenter.x;
2802 float center_y = (
float) y - blurCenter.y;
2803 float radius = hypot(center_x, center_y);
2805 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2807 if (radius > MagickEpsilon)
2809 step = (
unsigned int) (blur_radius / radius);
2812 if (step >= cossin_theta_size)
2813 step = cossin_theta_size-1;
2816 float4 result = 0.0f;
2817 float normalize = 0.0f;
2820 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2822 int cx = ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns);
2823 int cy = ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f,rows);
2825 float4 pixel = ReadAllChannels(image, number_channels, columns, cx, cy);
2827 if ((number_channels == 4) || (number_channels == 2))
2833 result.x += alpha * pixel.x;
2834 result.y += alpha * pixel.y;
2835 result.z += alpha * pixel.z;
2836 result.w += pixel.w;
2846 if ((number_channels == 4) || (number_channels == 2))
2852 result.w *= normalize;
2855 result *= normalize;
2857 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, result);
2874 __kernel
void UnsharpMaskBlurColumn(
const __global CLQuantum* image,
2875 const __global float4 *blurRowData,
const unsigned int number_channels,
2876 const ChannelType channel,
const unsigned int columns,
2877 const unsigned int rows,__local float4* cachedData,
2878 __local
float* cachedFilter,
const __global
float *filter,
2879 const unsigned int width,
const float gain,
const float threshold,
2880 __global CLQuantum *filteredImage)
2882 const unsigned int radius = (width-1)/2;
2885 const int groupX = get_group_id(0);
2886 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
2887 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
2889 if ((groupStartY >= 0) && (groupStopY < rows))
2891 event_t e = async_work_group_strided_copy(cachedData,
2892 blurRowData+groupStartY*columns+groupX,groupStopY-groupStartY,columns,0);
2893 wait_group_events(1,&e);
2897 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1))
2898 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,rows)*columns + groupX];
2900 barrier(CLK_LOCAL_MEM_FENCE);
2903 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
2904 wait_group_events(1,&e);
2907 const int cy = get_global_id(1);
2911 float4 blurredPixel = (float4) 0.0f;
2915 for ( ; i+7 < width; )
2917 for (
int j=0; j < 8; j++, i++)
2918 blurredPixel+=cachedFilter[i+j]*cachedData[i+j+get_local_id(1)];
2921 for ( ; i < width; i++)
2922 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
2924 float4 inputImagePixel = ReadFloat4(image,number_channels,columns,groupX,cy,channel);
2925 float4 outputPixel = inputImagePixel - blurredPixel;
2929 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
2930 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
2933 WriteFloat4(filteredImage,number_channels,columns,groupX,cy,channel,outputPixel);
2939 __kernel
void UnsharpMask(
const __global CLQuantum *image,
const unsigned int number_channels,
2940 const ChannelType channel,__constant
float *filter,
const unsigned int width,
2941 const unsigned int columns,
const unsigned int rows,__local float4 *pixels,
2942 const float gain,
const float threshold,__global CLQuantum *filteredImage)
2944 const unsigned int x = get_global_id(0);
2945 const unsigned int y = get_global_id(1);
2947 const unsigned int radius = (width - 1) / 2;
2949 int row = y - radius;
2950 int baseRow = get_group_id(1) * get_local_size(1) - radius;
2951 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
2953 while (row < endRow) {
2954 int srcy = (row < 0) ? -row : row;
2955 srcy = (srcy >= rows) ? (2 * rows - srcy - 1) : srcy;
2957 float4 value = 0.0f;
2959 int ix = x - radius;
2962 while (i + 7 < width) {
2963 for (
int j = 0; j < 8; ++j) {
2965 srcx = (srcx < 0) ? -srcx : srcx;
2966 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2967 value += filter[i + j] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2974 int srcx = (ix < 0) ? -ix : ix;
2975 srcx = (srcx >= columns) ? (2 * columns - srcx - 1) : srcx;
2976 value += filter[i] * ReadFloat4(image, number_channels, columns, srcx, srcy, channel);
2980 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
2981 row += get_local_size(1);
2984 barrier(CLK_LOCAL_MEM_FENCE);
2986 const int px = get_local_id(0);
2987 const int py = get_local_id(1);
2988 const int prp = get_local_size(0);
2989 float4 value = (float4)(0.0f);
2992 while (i + 7 < width) {
2993 for (
int j = 0; j < 8; ++j)
2994 value += (float4)(filter[i]) * pixels[px + (py + i + j) * prp];
2998 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3002 if ((x < columns) && (y < rows)) {
3003 float4 srcPixel = ReadFloat4(image, number_channels, columns, x, y, channel);
3004 float4 diff = srcPixel - value;
3008 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3009 value = select(srcPixel + diff * gain, srcPixel, mask);
3011 WriteFloat4(filteredImage, number_channels, columns, x, y, channel, value);
3017 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
3018 void WaveletDenoise(__global CLQuantum *srcImage,__global CLQuantum *dstImage,
3019 const
unsigned int number_channels,const
unsigned int max_channels,
3020 const
float threshold,const
int passes,const
unsigned int imageWidth,
3021 const
unsigned int imageHeight)
3023 const int pad = (1 << (passes - 1));
3024 const int tileSize = 64;
3025 const int tileRowPixels = 64;
3026 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3028 CLQuantum stage[48];
3030 local
float buffer[64 * 64];
3032 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3033 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3035 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3036 int pos = (mirrorTop(mirrorBottom(srcx), imageWidth) * number_channels) +
3037 (mirrorTop(mirrorBottom(srcy + i), imageHeight)) * imageWidth * number_channels;
3039 for (
int channel = 0; channel < max_channels; ++channel)
3040 stage[(i / 4) + (16 * channel)] = srcImage[pos + channel];
3043 for (
int channel = 0; channel < max_channels; ++channel) {
3045 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3046 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[(i / 4) + (16 * channel)]);
3054 for (
int i = 0; i < 16; i++)
3057 for (
int pass = 0; pass < passes; ++pass) {
3058 const int radius = 1 << pass;
3059 const int x = get_local_id(0);
3060 const float thresh = threshold * noise[pass];
3063 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3064 const int offset = i * tileRowPixels;
3066 tmp[i / 4] = buffer[x + offset];
3067 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3068 barrier(CLK_LOCAL_MEM_FENCE);
3069 buffer[x + offset] = pixel;
3071 barrier(CLK_LOCAL_MEM_FENCE);
3074 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3075 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3076 float delta = tmp[i / 4] - pixel;
3078 if (delta < -thresh)
3080 else if (delta > thresh)
3084 accum[i / 4] += delta;
3086 barrier(CLK_LOCAL_MEM_FENCE);
3088 if (pass < passes - 1)
3089 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3090 buffer[x + i * tileRowPixels] = tmp[i / 4];
3092 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3093 accum[i / 4] += tmp[i / 4];
3094 barrier(CLK_LOCAL_MEM_FENCE);
3097 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3100 barrier(CLK_LOCAL_MEM_FENCE);
3105 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3106 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3107 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3108 int pos = (srcx * number_channels) + ((srcy + i) * (imageWidth * number_channels));
3109 for (
int channel = 0; channel < max_channels; ++channel) {
3110 dstImage[pos + channel] = stage[(i / 4) + (16 * channel)];
3120 #endif // MAGICKCORE_OPENCL_SUPPORT
3122 #if defined(__cplusplus) || defined(c_plusplus)
3126 #endif // MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
Definition: composite.h:84
Definition: composite.h:72
Definition: composite.h:78
Definition: resize-private.h:31
Definition: colorspace.h:57
Definition: composite.h:28
Definition: resize-private.h:37
Definition: visual-effects.h:34
MagickExport void ConvertRGBToHSL(const double red, const double green, const double blue, double *hue, double *saturation, double *lightness)
Definition: gem.c:1099
Definition: statistic.h:125
Definition: resize-private.h:33
Definition: composite.h:91
Definition: colorspace.h:50
Definition: composite.h:30
Definition: composite.h:68
Definition: colorspace.h:46
Definition: colorspace.h:53
Definition: composite.h:95
Definition: composite.h:33
static double Hann(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:322
Definition: resize-private.h:40
Definition: composite.h:96
Definition: resize-private.h:29
PixelIntensityMethod
Definition: pixel.h:99
Definition: composite.h:54
static double Blackman(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:149
Definition: colorspace.h:32
Definition: composite.h:64
Definition: composite.h:83
Definition: composite.h:27
Definition: colorspace.h:33
Definition: colorspace.h:58
Definition: composite.h:35
Definition: composite.h:52
#define MagickPI
Definition: image-private.h:40
Definition: colorspace.h:36
Definition: colorspace.h:44
Definition: colorspace.h:47
static void ModulateHSL(const double percent_hue, const double percent_saturation, const double percent_lightness, double *red, double *green, double *blue)
Definition: enhance.c:3526
Definition: statistic.h:128
Definition: colorspace.h:45
#define MAGICKCORE_QUANTUM_DEPTH
Definition: magick-type.h:32
Definition: composite.h:49
Definition: colorspace.h:55
NoiseType
Definition: visual-effects.h:27
Definition: resize-private.h:38
#define MagickEpsilon
Definition: magick-type.h:114
Definition: colorspace.h:42
static Quantum ClampToQuantum(const MagickRealType quantum)
Definition: quantum.h:85
Definition: statistic.h:126
MagickPrivate void ConvertRGBToHSB(const double, const double, const double, double *, double *, double *)
Definition: visual-effects.h:36
Definition: visual-effects.h:35
Definition: resize-private.h:30
Definition: colorspace.h:39
Definition: composite.h:47
Definition: resize-private.h:41
Definition: composite.h:89
Definition: composite.h:76
Definition: composite.h:88
Definition: colorspace.h:52
Definition: composite.h:42
Definition: colorspace.h:37
Definition: composite.h:48
Definition: colorspace.h:54
Definition: colorspace.h:34
static double PerceptibleReciprocal(const double x)
Definition: pixel-accessor.h:234
Definition: composite.h:50
Definition: composite.h:38
Definition: composite.h:82
Definition: composite.h:74
Definition: composite.h:87
Definition: resize-private.h:32
Definition: composite.h:51
Definition: composite.h:60
Definition: composite.h:85
static Quantum ApplyFunction(Quantum pixel, const MagickFunction function, const size_t number_parameters, const double *parameters, ExceptionInfo *exception)
Definition: statistic.c:974
Definition: colorspace.h:59
Definition: composite.h:53
Definition: resize-private.h:36
Definition: colorspace.h:51
static void Contrast(const int sign, double *red, double *green, double *blue)
Definition: enhance.c:1372
#define SigmaMultiplicativeGaussian
Definition: visual-effects.h:30
Definition: composite.h:56
Definition: composite.h:46
Definition: statistic.h:124
Definition: composite.h:79
Definition: colorspace.h:40
Definition: composite.h:94
static double Triangle(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:543
Definition: resize-private.h:34
#define QuantumScale
Definition: magick-type.h:119
Definition: colorspace.h:41
Definition: resize-private.h:39
Definition: composite.h:71
ChannelType
Definition: pixel.h:33
Definition: colorspace.h:29
MagickExport void ConvertHSLToRGB(const double hue, const double saturation, const double lightness, double *red, double *green, double *blue)
Definition: gem.c:462
#define MaxMap
Definition: magick-type.h:79
#define MagickMax(x, y)
Definition: image-private.h:36
Definition: composite.h:67
Definition: composite.h:39
Definition: composite.h:45
Definition: composite.h:44
Definition: composite.h:86
Definition: resize-private.h:28
Definition: composite.h:97
Definition: composite.h:41
Definition: composite.h:59
Definition: colorspace.h:28
Definition: composite.h:93
Definition: colorspace.h:43
Definition: composite.h:70
Definition: composite.h:92
Definition: visual-effects.h:32
Definition: colorspace.h:48
Definition: resize-private.h:42
static double Sinc(const double, const ResizeFilter *)
Definition: composite.h:57
Definition: composite.h:36
Definition: composite.h:43
static double CubicBC(const double x, const ResizeFilter *resize_filter)
Definition: resize.c:207
Definition: visual-effects.h:31
Definition: composite.h:37
Definition: composite.h:66
Definition: statistic.h:127
ResizeWeightingFunctionType
Definition: resize-private.h:25
Definition: composite.h:98
Definition: colorspace.h:49
static double Hamming(const double x, const ResizeFilter *magick_unused(resize_filter))
Definition: resize.c:334
#define MagickMin(x, y)
Definition: image-private.h:37
ColorspaceType
Definition: colorspace.h:25
Definition: composite.h:32
Definition: colorspace.h:30
Definition: composite.h:31
Definition: colorspace.h:35
Definition: composite.h:55
Definition: composite.h:75
Definition: colorspace.h:31
CompositeOperator
Definition: composite.h:25
Definition: composite.h:77
Definition: colorspace.h:60
Definition: colorspace.h:38
Definition: composite.h:80
Definition: composite.h:29
Definition: colorspace.h:56
MagickPrivate void ConvertHSBToRGB(const double, const double, const double, double *, double *, double *)
Definition: composite.h:62
Definition: composite.h:73
Definition: composite.h:63
Definition: composite.h:69
Definition: composite.h:34
Definition: visual-effects.h:29
Definition: resize-private.h:27
Definition: composite.h:90
Definition: colorspace.h:27
MagickExport MagickRealType GetPixelIntensity(const Image *magick_restrict image, const Quantum *magick_restrict pixel)
Definition: pixel.c:2358
MagickFunction
Definition: statistic.h:122
Definition: composite.h:40
Definition: composite.h:81
Definition: resize-private.h:35
#define QuantumRange
Definition: magick-type.h:87
Definition: composite.h:58
Definition: composite.h:65
Definition: composite.h:61
Definition: visual-effects.h:33