19 #ifndef MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
20 #define MAGICKCORE_ACCELERATE_KERNELS_PRIVATE_H
22 #if defined(__cplusplus) || defined(c_plusplus)
26 #if defined(MAGICKCORE_OPENCL_SUPPORT)
31 #define OPENCL_DEFINE(VAR,...) "\n #""define " #VAR " " #__VA_ARGS__ " \n"
32 #define OPENCL_ELIF(...) "\n #""elif " #__VA_ARGS__ " \n"
33 #define OPENCL_ELSE() "\n #""else " " \n"
34 #define OPENCL_ENDIF() "\n #""endif " " \n"
35 #define OPENCL_IF(...) "\n #""if " #__VA_ARGS__ " \n"
36 #define STRINGIFY(...) #__VA_ARGS__ "\n"
38 const char* accelerateKernels =
43 OPENCL_DEFINE(GetPixelAlpha(pixel),(QuantumRange-(pixel).w))
44 OPENCL_DEFINE(SigmaUniform, (attenuate*0.015625f))
45 OPENCL_DEFINE(SigmaGaussian, (attenuate*0.015625f))
46 OPENCL_DEFINE(SigmaImpulse, (attenuate*0.1f))
47 OPENCL_DEFINE(SigmaLaplacian, (attenuate*0.0390625f))
48 OPENCL_DEFINE(SigmaMultiplicativeGaussian, (attenuate*0.5f))
49 OPENCL_DEFINE(SigmaPoisson, (attenuate*12.5f))
50 OPENCL_DEFINE(SigmaRandom, (attenuate))
51 OPENCL_DEFINE(TauGaussian, (attenuate*0.078125f))
52 OPENCL_DEFINE(MagickMax(x, y), (((x) > (y)) ? (x) : (y)))
53 OPENCL_DEFINE(MagickMin(x, y), (((x) < (y)) ? (x) : (y)))
64 TransparentColorspace,
79 Rec601YCbCrColorspace,
81 Rec709YCbCrColorspace,
102 UndefinedCompositeOp,
104 ModulusAddCompositeOp,
108 ChangeMaskCompositeOp,
110 ColorBurnCompositeOp,
111 ColorDodgeCompositeOp,
113 CopyBlackCompositeOp,
117 CopyGreenCompositeOp,
118 CopyMagentaCompositeOp,
119 CopyOpacityCompositeOp,
121 CopyYellowCompositeOp,
128 DifferenceCompositeOp,
131 ExclusionCompositeOp,
132 HardLightCompositeOp,
136 LinearLightCompositeOp,
148 SoftLightCompositeOp,
154 ModulusSubtractCompositeOp,
155 ThresholdCompositeOp,
161 DivideDstCompositeOp,
164 PegtopLightCompositeOp,
165 VividLightCompositeOp,
167 LinearDodgeCompositeOp,
168 LinearBurnCompositeOp,
169 MathematicsCompositeOp,
170 DivideSrcCompositeOp,
172 DarkenIntensityCompositeOp,
173 LightenIntensityCompositeOp
194 MultiplicativeGaussianNoise,
205 UndefinedPixelIntensityMethod = 0,
206 AveragePixelIntensityMethod,
207 BrightnessPixelIntensityMethod,
208 LightnessPixelIntensityMethod,
209 Rec601LumaPixelIntensityMethod,
210 Rec601LuminancePixelIntensityMethod,
211 Rec709LumaPixelIntensityMethod,
212 Rec709LuminancePixelIntensityMethod,
213 RMSPixelIntensityMethod,
214 MSPixelIntensityMethod
215 } PixelIntensityMethod;
220 BoxWeightingFunction = 0,
221 TriangleWeightingFunction,
222 CubicBCWeightingFunction,
223 HanningWeightingFunction,
224 HammingWeightingFunction,
225 BlackmanWeightingFunction,
226 GaussianWeightingFunction,
227 QuadraticWeightingFunction,
228 JincWeightingFunction,
229 SincWeightingFunction,
230 SincFastWeightingFunction,
231 KaiserWeightingFunction,
232 WelshWeightingFunction,
233 BohmanWeightingFunction,
234 LagrangeWeightingFunction,
235 CosineWeightingFunction,
236 } ResizeWeightingFunctionType;
244 GrayChannel = 0x0001,
245 CyanChannel = 0x0001,
246 GreenChannel = 0x0002,
247 MagentaChannel = 0x0002,
248 BlueChannel = 0x0004,
249 YellowChannel = 0x0004,
250 AlphaChannel = 0x0008,
251 OpacityChannel = 0x0008,
252 MatteChannel = 0x0008,
253 BlackChannel = 0x0020,
254 IndexChannel = 0x0020,
255 CompositeChannels = 0x002F,
256 AllChannels = 0x7ffffff,
260 TrueAlphaChannel = 0x0040,
261 RGBChannels = 0x0080,
262 GrayChannels = 0x0080,
263 SyncChannels = 0x0100,
264 DefaultChannels = ((AllChannels | SyncChannels) &~ OpacityChannel)
272 OPENCL_IF((MAGICKCORE_QUANTUM_DEPTH == 8))
275 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
277 return((CLQuantum) value);
281 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 16))
284 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
286 return((CLQuantum) (257.0f*value));
290 OPENCL_ELIF((MAGICKCORE_QUANTUM_DEPTH == 32))
293 static inline CLQuantum ScaleCharToQuantum(
const unsigned char value)
295 return((CLQuantum) (16843009.0*value));
301 OPENCL_IF((MAGICKCORE_HDRI_SUPPORT == 1))
304 static inline CLQuantum ClampToQuantum(const
float value)
306 return (CLQuantum) value;
313 static inline CLQuantum ClampToQuantum(const
float value)
315 return (CLQuantum) (clamp(value, 0.0f, QuantumRange) + 0.5f);
322 static inline
int ClampToCanvas(const
int offset, const
int range)
324 return clamp(offset, (
int)0, range - 1);
329 static inline int ClampToCanvasWithHalo(
const int offset,
const int range,
const int edge,
const int section)
331 return clamp(offset, section ? (
int)(0 - edge) : (
int)0, section ? (range - 1) : (range - 1 + edge));
336 static inline uint ScaleQuantumToMap(CLQuantum value)
338 if (value >= (CLQuantum)MaxMap)
339 return ((uint)MaxMap);
341 return ((uint)value);
346 static inline float PerceptibleReciprocal(
const float x)
348 float sign = x < (float) 0.0 ? (
float)-1.0 : (float) 1.0;
349 return((sign*x) >= MagickEpsilon ? (float) 1.0 / x : sign*((
float) 1.0 / MagickEpsilon));
354 static inline float RoundToUnity(
const float value)
356 return clamp(value, 0.0f, 1.0f);
362 static inline CLQuantum getBlue(CLPixelType p) {
return p.x; }
363 static inline void setBlue(CLPixelType* p, CLQuantum value) { (*p).x = value; }
364 static inline float getBlueF4(float4 p) {
return p.x; }
365 static inline void setBlueF4(float4* p,
float value) { (*p).x = value; }
367 static inline CLQuantum getGreen(CLPixelType p) {
return p.y; }
368 static inline void setGreen(CLPixelType* p, CLQuantum value) { (*p).y = value; }
369 static inline float getGreenF4(float4 p) {
return p.y; }
370 static inline void setGreenF4(float4* p,
float value) { (*p).y = value; }
372 static inline CLQuantum getRed(CLPixelType p) {
return p.z; }
373 static inline void setRed(CLPixelType* p, CLQuantum value) { (*p).z = value; }
374 static inline float getRedF4(float4 p) {
return p.z; }
375 static inline void setRedF4(float4* p,
float value) { (*p).z = value; }
377 static inline CLQuantum getOpacity(CLPixelType p) {
return p.w; }
378 static inline void setOpacity(CLPixelType* p, CLQuantum value) { (*p).w = value; }
379 static inline float getOpacityF4(float4 p) {
return p.w; }
380 static inline void setOpacityF4(float4* p,
float value) { (*p).w = value; }
382 static inline void setGray(CLPixelType* p, CLQuantum value) { (*p).z = value; (*p).y = value; (*p).x = value; }
384 static inline float GetPixelIntensity(
const int method,
const int colorspace, CLPixelType p)
386 float red = getRed(p);
387 float green = getGreen(p);
388 float blue = getBlue(p);
392 if (colorspace == GRAYColorspace)
397 case AveragePixelIntensityMethod:
399 intensity = (red + green + blue) / 3.0;
402 case BrightnessPixelIntensityMethod:
404 intensity = MagickMax(MagickMax(red, green), blue);
407 case LightnessPixelIntensityMethod:
409 intensity = (MagickMin(MagickMin(red, green), blue) +
410 MagickMax(MagickMax(red, green), blue)) / 2.0;
413 case MSPixelIntensityMethod:
415 intensity = (float)(((
float)red*red + green*green + blue*blue) /
419 case Rec601LumaPixelIntensityMethod:
429 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
432 case Rec601LuminancePixelIntensityMethod:
442 intensity = 0.298839*red + 0.586811*green + 0.114350*blue;
445 case Rec709LumaPixelIntensityMethod:
456 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
459 case Rec709LuminancePixelIntensityMethod:
469 intensity = 0.212656*red + 0.715158*green + 0.072186*blue;
472 case RMSPixelIntensityMethod:
474 intensity = (float)(sqrt((
float)red*red + green*green + blue*blue) /
507 ulong MWC_AddMod64(ulong a, ulong b, ulong M)
511 if( (v>=M) || (convert_float(v) < convert_float(a)) )
522 ulong MWC_MulMod64(ulong a, ulong b, ulong M)
527 r=MWC_AddMod64(r,b,M);
528 b=MWC_AddMod64(b,b,M);
539 ulong MWC_PowMod64(ulong a, ulong e, ulong M)
544 acc=MWC_MulMod64(acc,sqr,M);
545 sqr=MWC_MulMod64(sqr,sqr,M);
551 uint2 MWC_SkipImpl_Mod64(uint2 curr, ulong A, ulong M, ulong distance)
553 ulong m=MWC_PowMod64(A, distance, M);
554 ulong x=curr.x*(ulong)A+curr.y;
555 x=MWC_MulMod64(x, m, M);
556 return (uint2)((uint)(x/A), (uint)(x%A));
559 uint2 MWC_SeedImpl_Mod64(ulong A, ulong M, uint vecSize, uint vecOffset, ulong streamBase, ulong streamGap)
566 enum{ MWC_BASEID = 4077358422479273989UL };
568 ulong dist=streamBase + (get_global_id(0)*vecSize+vecOffset)*streamGap;
569 ulong m=MWC_PowMod64(A, dist, M);
571 ulong x=MWC_MulMod64(MWC_BASEID, m, M);
572 return (uint2)((uint)(x/A), (uint)(x%A));
576 typedef struct{ uint x; uint c; uint seed0; ulong seed1; } mwc64x_state_t;
578 void MWC64X_Step(mwc64x_state_t *s)
582 uint Xn=s->seed0*X+C;
583 uint carry=(uint)(Xn<C);
584 uint Cn=mad_hi(s->seed0,X,carry);
590 void MWC64X_Skip(mwc64x_state_t *s, ulong distance)
592 uint2 tmp=MWC_SkipImpl_Mod64((uint2)(s->x,s->c), s->seed0, s->seed1, distance);
597 void MWC64X_SeedStreams(mwc64x_state_t *s, ulong baseOffset, ulong perStreamOffset)
599 uint2 tmp=MWC_SeedImpl_Mod64(s->seed0, s->seed1, 1, 0, baseOffset, perStreamOffset);
605 uint MWC64X_NextUint(mwc64x_state_t *s)
607 uint res=s->x ^ s->c;
616 float mwcReadPseudoRandomValue(mwc64x_state_t* rng) {
617 return (1.0f * MWC64X_NextUint(rng)) / (float)(0xffffffff);
621 float mwcGenerateDifferentialNoise(mwc64x_state_t* r, CLQuantum pixel, NoiseType noise_type,
float attenuate) {
630 alpha=mwcReadPseudoRandomValue(r);
635 noise=(pixel+QuantumRange*SigmaUniform*(alpha-0.5f));
646 beta=mwcReadPseudoRandomValue(r);
647 gamma=sqrt(-2.0f*log(alpha));
648 sigma=gamma*cospi((2.0f*beta));
649 tau=gamma*sinpi((2.0f*beta));
650 noise=(float)(pixel+sqrt((
float) pixel)*SigmaGaussian*sigma+
651 QuantumRange*TauGaussian*tau);
658 if (alpha < (SigmaImpulse/2.0f))
661 if (alpha >= (1.0f-(SigmaImpulse/2.0f)))
662 noise=(
float)QuantumRange;
671 if (alpha <= MagickEpsilon)
672 noise=(float) (pixel-QuantumRange);
674 noise=(float) (pixel+QuantumRange*SigmaLaplacian*log(2.0f*alpha)+
679 if (beta <= (0.5f*MagickEpsilon))
680 noise=(
float) (pixel+QuantumRange);
682 noise=(float) (pixel-QuantumRange*SigmaLaplacian*log(2.0f*beta)+0.5f);
685 case MultiplicativeGaussianNoise:
688 if (alpha > MagickEpsilon)
689 sigma=sqrt(-2.0f*log(alpha));
690 beta=mwcReadPseudoRandomValue(r);
691 noise=(float) (pixel+pixel*SigmaMultiplicativeGaussian*sigma*
692 cospi((
float) (2.0f*beta))/2.0f);
700 poisson=exp(-SigmaPoisson*QuantumScale*pixel);
701 for (i=0; alpha > poisson; i++)
703 beta=mwcReadPseudoRandomValue(r);
706 noise=(float) (QuantumRange*i*PerceptibleReciprocal(SigmaPoisson));
711 noise=(float) (QuantumRange*SigmaRandom*alpha);
720 void AddNoise(
const __global CLPixelType* inputImage, __global CLPixelType* filteredImage
721 ,
const unsigned int inputPixelCount,
const unsigned int pixelsPerWorkItem
722 ,
const ChannelType channel
723 ,
const NoiseType noise_type,
const float attenuate
724 ,
const unsigned int seed0,
const unsigned int seed1
725 ,
const unsigned int numRandomNumbersPerPixel) {
731 uint span = pixelsPerWorkItem * numRandomNumbersPerPixel;
732 uint offset = span * get_local_size(0) * get_group_id(0);
734 MWC64X_SeedStreams(&rng, offset, span);
736 uint pos = get_local_size(0) * get_group_id(0) * pixelsPerWorkItem + get_local_id(0);
738 uint count = pixelsPerWorkItem;
741 if (pos < inputPixelCount) {
742 CLPixelType p = inputImage[pos];
744 if ((channel&RedChannel)!=0) {
745 setRed(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getRed(p),noise_type,attenuate)));
748 if ((channel&GreenChannel)!=0) {
749 setGreen(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getGreen(p),noise_type,attenuate)));
752 if ((channel&BlueChannel)!=0) {
753 setBlue(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getBlue(p),noise_type,attenuate)));
756 if ((channel & OpacityChannel) != 0) {
757 setOpacity(&p,ClampToQuantum(mwcGenerateDifferentialNoise(&rng,getOpacity(p),noise_type,attenuate)));
760 filteredImage[pos] = p;
763 pos += get_local_size(0);
789 __kernel
void BlurRow(__global CLPixelType *im, __global float4 *filtered_im,
790 const ChannelType channel, __constant
float *filter,
791 const unsigned int width,
792 const unsigned int imageColumns,
const unsigned int imageRows,
793 __local CLPixelType *temp)
795 const int x = get_global_id(0);
796 const int y = get_global_id(1);
798 const int columns = imageColumns;
800 const unsigned int radius = (width-1)/2;
801 const int wsize = get_local_size(0);
802 const unsigned int loadSize = wsize+width;
830 const int groupX=get_local_size(0)*get_group_id(0);
831 const int groupY=get_local_size(1)*get_group_id(1);
834 for (
int i=get_local_id(0); i < loadSize; i=i+get_local_size(0))
837 temp[i] = im[y * columns + ClampToCanvas(i+groupX-radius, columns)];
846 barrier(CLK_LOCAL_MEM_FENCE);
849 if (get_global_id(0) < columns)
852 float4 result = (float4) 0;
856 \n #ifndef UFACTOR \n
857 \n #define UFACTOR 8 \n
860 for ( ; i+UFACTOR < width; )
862 \n #pragma unroll UFACTOR\n
863 for (
int j=0; j < UFACTOR; j++, i++)
865 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
869 for ( ; i < width; i++)
871 result+=filter[i]*convert_float4(temp[i+get_local_id(0)]);
874 result.x = ClampToQuantum(result.x);
875 result.y = ClampToQuantum(result.y);
876 result.z = ClampToQuantum(result.z);
877 result.w = ClampToQuantum(result.w);
880 filtered_im[y*columns+x] = result;
893 __kernel
void BlurColumn(
const __global float4 *blurRowData, __global CLPixelType *filtered_im,
894 const ChannelType channel, __constant
float *filter,
895 const unsigned int width,
896 const unsigned int imageColumns,
const unsigned int imageRows,
897 __local float4 *temp)
899 const int x = get_global_id(0);
900 const int y = get_global_id(1);
904 const int columns = imageColumns;
905 const int rows = imageRows;
907 unsigned int radius = (width-1)/2;
908 const int wsize = get_local_size(1);
909 const unsigned int loadSize = wsize+width;
912 const int groupX=get_local_size(0)*get_group_id(0);
913 const int groupY=get_local_size(1)*get_group_id(1);
918 for (
int i = get_local_id(1); i < loadSize; i=i+get_local_size(1))
920 temp[i] = blurRowData[ClampToCanvas(i+groupY-radius, rows) * columns + groupX];
924 barrier(CLK_LOCAL_MEM_FENCE);
927 if (get_global_id(1) < rows)
930 float4 result = (float4) 0;
934 \n #ifndef UFACTOR \n
935 \n #define UFACTOR 8 \n
938 for ( ; i+UFACTOR < width; )
940 \n #pragma unroll UFACTOR \n
941 for (
int j=0; j < UFACTOR; j++, i++)
943 result+=filter[i]*temp[i+get_local_id(1)];
947 for ( ; i < width; i++)
949 result+=filter[i]*temp[i+get_local_id(1)];
952 result.x = ClampToQuantum(result.x);
953 result.y = ClampToQuantum(result.y);
954 result.z = ClampToQuantum(result.z);
955 result.w = ClampToQuantum(result.w);
958 filtered_im[y*columns+x] = (CLPixelType) (result.x,result.y,result.z,result.w);
977 static inline float ColorDodge(
const float Sca,
978 const float Sa,
const float Dca,
const float Da)
983 if ((Sca*Da+Dca*Sa) >= Sa*Da)
984 return(Sa*Da+Sca*(1.0-Da)+Dca*(1.0-Sa));
985 return(Dca*Sa*Sa/(Sa-Sca)+Sca*(1.0-Da)+Dca*(1.0-Sa));
1018 static inline void CompositeColorDodge(
const float4 *p,
1019 const float4 *q,float4 *composite) {
1026 Sa=1.0f-QuantumScale*getOpacityF4(*p);
1027 Da=1.0f-QuantumScale*getOpacityF4(*q);
1028 gamma=RoundToUnity(Sa+Da-Sa*Da);
1029 setOpacityF4(composite, QuantumRange*(1.0-gamma));
1030 gamma=QuantumRange/(fabs(gamma) < MagickEpsilon ? MagickEpsilon : gamma);
1031 setRedF4(composite,gamma*ColorDodge(QuantumScale*getRedF4(*p)*Sa,Sa,QuantumScale*
1032 getRedF4(*q)*Da,Da));
1033 setGreenF4(composite,gamma*ColorDodge(QuantumScale*getGreenF4(*p)*Sa,Sa,QuantumScale*
1034 getGreenF4(*q)*Da,Da));
1035 setBlueF4(composite,gamma*ColorDodge(QuantumScale*getBlueF4(*p)*Sa,Sa,QuantumScale*
1036 getBlueF4(*q)*Da,Da));
1041 static inline void MagickPixelCompositePlus(
const float4 *p,
1042 const float alpha,
const float4 *q,
1043 const float beta,float4 *composite)
1054 Sa=1.0-QuantumScale*alpha;
1055 Da=1.0-QuantumScale*beta;
1056 gamma=RoundToUnity(Sa+Da);
1057 setOpacityF4(composite,(
float) QuantumRange*(1.0-gamma));
1058 gamma=PerceptibleReciprocal(gamma);
1059 setRedF4(composite,gamma*(Sa*getRedF4(*p)+Da*getRedF4(*q)));
1060 setGreenF4(composite,gamma*(Sa*getGreenF4(*p)+Da*getGreenF4(*q)));
1061 setBlueF4(composite,gamma*(Sa*getBlueF4(*p)+Da*getBlueF4(*q)));
1066 static inline void MagickPixelCompositeBlend(
const float4 *p,
1067 const float alpha,
const float4 *q,
1068 const float beta,float4 *composite)
1070 MagickPixelCompositePlus(p,(
float) (QuantumRange-alpha*
1071 (QuantumRange-getOpacityF4(*p))),q,(
float) (QuantumRange-beta*
1072 (QuantumRange-getOpacityF4(*q))),composite);
1078 void Composite(__global CLPixelType *image,
1079 const unsigned int imageWidth,
1080 const unsigned int imageHeight,
1081 const unsigned int imageMatte,
1082 const __global CLPixelType *compositeImage,
1083 const unsigned int compositeWidth,
1084 const unsigned int compositeHeight,
1085 const unsigned int compositeMatte,
1086 const unsigned int compose,
1087 const ChannelType channel,
1088 const float destination_dissolve,
1089 const float source_dissolve) {
1092 index.x = get_global_id(0);
1093 index.y = get_global_id(1);
1096 if (index.x >= imageWidth
1097 || index.y >= imageHeight) {
1100 const CLPixelType inputPixel = image[index.y*imageWidth+index.x];
1102 setRedF4(&destination,getRed(inputPixel));
1103 setGreenF4(&destination,getGreen(inputPixel));
1104 setBlueF4(&destination,getBlue(inputPixel));
1107 const CLPixelType compositePixel
1108 = compositeImage[index.y*imageWidth+index.x];
1110 setRedF4(&source,getRed(compositePixel));
1111 setGreenF4(&source,getGreen(compositePixel));
1112 setBlueF4(&source,getBlue(compositePixel));
1114 if (imageMatte != 0) {
1115 setOpacityF4(&destination,getOpacity(inputPixel));
1118 setOpacityF4(&destination,0.0f);
1121 if (compositeMatte != 0) {
1122 setOpacityF4(&source,getOpacity(compositePixel));
1125 setOpacityF4(&source,0.0f);
1128 float4 composite=destination;
1130 CompositeOperator op = (CompositeOperator)compose;
1132 case ColorDodgeCompositeOp:
1133 CompositeColorDodge(&source,&destination,&composite);
1135 case BlendCompositeOp:
1136 MagickPixelCompositeBlend(&source,source_dissolve,&destination,
1137 destination_dissolve,&composite);
1144 CLPixelType outputPixel;
1145 setRed(&outputPixel, ClampToQuantum(getRedF4(composite)));
1146 setGreen(&outputPixel, ClampToQuantum(getGreenF4(composite)));
1147 setBlue(&outputPixel, ClampToQuantum(getBlueF4(composite)));
1148 setOpacity(&outputPixel, ClampToQuantum(getOpacityF4(composite)));
1149 image[index.y*imageWidth+index.x] = outputPixel;
1167 static inline float3 ConvertRGBToHSB(CLPixelType pixel) {
1168 float3 HueSaturationBrightness;
1169 HueSaturationBrightness.x = 0.0f;
1170 HueSaturationBrightness.y = 0.0f;
1171 HueSaturationBrightness.z = 0.0f;
1173 float r=(float) getRed(pixel);
1174 float g=(float) getGreen(pixel);
1175 float b=(float) getBlue(pixel);
1177 float tmin=MagickMin(MagickMin(r,g),b);
1178 float tmax= MagickMax(MagickMax(r,g),b);
1181 float delta=tmax-tmin;
1182 HueSaturationBrightness.y=delta/tmax;
1183 HueSaturationBrightness.z=QuantumScale*tmax;
1185 if (delta != 0.0f) {
1186 HueSaturationBrightness.x = ((r == tmax)?0.0f:((g == tmax)?2.0f:4.0f));
1187 HueSaturationBrightness.x += ((r == tmax)?(g-b):((g == tmax)?(b-r):(r-g)))/delta;
1188 HueSaturationBrightness.x/=6.0f;
1189 HueSaturationBrightness.x += (HueSaturationBrightness.x < 0.0f)?0.0f:1.0f;
1192 return HueSaturationBrightness;
1195 static inline CLPixelType ConvertHSBToRGB(float3 HueSaturationBrightness) {
1197 float hue = HueSaturationBrightness.x;
1198 float brightness = HueSaturationBrightness.z;
1199 float saturation = HueSaturationBrightness.y;
1203 if (saturation == 0.0f) {
1204 setRed(&rgb,ClampToQuantum(QuantumRange*brightness));
1205 setGreen(&rgb,getRed(rgb));
1206 setBlue(&rgb,getRed(rgb));
1210 float h=6.0f*(hue-floor(hue));
1212 float p=brightness*(1.0f-saturation);
1213 float q=brightness*(1.0f-saturation*f);
1214 float t=brightness*(1.0f-(saturation*(1.0f-f)));
1216 float clampedBrightness = ClampToQuantum(QuantumRange*brightness);
1217 float clamped_t = ClampToQuantum(QuantumRange*t);
1218 float clamped_p = ClampToQuantum(QuantumRange*p);
1219 float clamped_q = ClampToQuantum(QuantumRange*q);
1221 setRed(&rgb, (ih == 1)?clamped_q:
1222 (ih == 2 || ih == 3)?clamped_p:
1223 (ih == 4)?clamped_t:
1226 setGreen(&rgb, (ih == 1 || ih == 2)?clampedBrightness:
1227 (ih == 3)?clamped_q:
1228 (ih == 4 || ih == 5)?clamped_p:
1231 setBlue(&rgb, (ih == 2)?clamped_t:
1232 (ih == 3 || ih == 4)?clampedBrightness:
1233 (ih == 5)?clamped_q:
1239 __kernel
void Contrast(__global CLPixelType *im,
const unsigned int sharpen)
1242 const int sign = sharpen!=0?1:-1;
1243 const int x = get_global_id(0);
1244 const int y = get_global_id(1);
1245 const int columns = get_global_size(0);
1246 const int c = x + y * columns;
1248 CLPixelType pixel = im[c];
1249 float3 HueSaturationBrightness = ConvertRGBToHSB(pixel);
1250 float brightness = HueSaturationBrightness.z;
1251 brightness+=0.5f*sign*(0.5f*(sinpi(brightness-0.5f)+1.0f)-brightness);
1252 brightness = clamp(brightness,0.0f,1.0f);
1253 HueSaturationBrightness.z = brightness;
1255 CLPixelType filteredPixel = ConvertHSBToRGB(HueSaturationBrightness);
1256 filteredPixel.w = pixel.w;
1257 im[c] = filteredPixel;
1276 __kernel
void Histogram(__global CLPixelType * restrict im,
1277 const ChannelType channel,
1279 const int colorspace,
1280 __global uint4 * restrict histogram)
1282 const int x = get_global_id(0);
1283 const int y = get_global_id(1);
1284 const int columns = get_global_size(0);
1285 const int c = x + y * columns;
1286 if ((channel & SyncChannels) != 0)
1288 float intensity = GetPixelIntensity(method, colorspace,im[c]);
1289 uint pos = ScaleQuantumToMap(ClampToQuantum(intensity));
1290 atomic_inc((__global uint *)(&(histogram[pos]))+2);
1303 __kernel
void ContrastStretch(__global CLPixelType * restrict im,
1304 const ChannelType channel,
1305 __global CLPixelType * restrict stretch_map,
1306 const float4 white,
const float4 black)
1308 const int x = get_global_id(0);
1309 const int y = get_global_id(1);
1310 const int columns = get_global_size(0);
1311 const int c = x + y * columns;
1314 CLPixelType oValue, eValue;
1315 CLQuantum red, green, blue, opacity;
1320 if ((channel & RedChannel) != 0)
1322 if (getRedF4(white) != getRedF4(black))
1324 ePos = ScaleQuantumToMap(getRed(oValue));
1325 eValue = stretch_map[ePos];
1326 red = getRed(eValue);
1330 if ((channel & GreenChannel) != 0)
1332 if (getGreenF4(white) != getGreenF4(black))
1334 ePos = ScaleQuantumToMap(getGreen(oValue));
1335 eValue = stretch_map[ePos];
1336 green = getGreen(eValue);
1340 if ((channel & BlueChannel) != 0)
1342 if (getBlueF4(white) != getBlueF4(black))
1344 ePos = ScaleQuantumToMap(getBlue(oValue));
1345 eValue = stretch_map[ePos];
1346 blue = getBlue(eValue);
1350 if ((channel & OpacityChannel) != 0)
1352 if (getOpacityF4(white) != getOpacityF4(black))
1354 ePos = ScaleQuantumToMap(getOpacity(oValue));
1355 eValue = stretch_map[ePos];
1356 opacity = getOpacity(eValue);
1361 im[c]=(CLPixelType)(blue, green, red, opacity);
1380 void ConvolveOptimized(
const __global CLPixelType *input, __global CLPixelType *output,
1381 const unsigned int imageWidth,
const unsigned int imageHeight,
1382 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1383 const uint matte,
const ChannelType channel, __local CLPixelType *pixelLocalCache, __local
float* filterCache) {
1386 blockID.x = get_group_id(0);
1387 blockID.y = get_group_id(1);
1391 imageAreaOrg.x = blockID.x * get_local_size(0);
1392 imageAreaOrg.y = blockID.y * get_local_size(1);
1394 int2 midFilterDimen;
1395 midFilterDimen.x = (filterWidth-1)/2;
1396 midFilterDimen.y = (filterHeight-1)/2;
1398 int2 cachedAreaOrg = imageAreaOrg - midFilterDimen;
1401 int2 cachedAreaDimen;
1402 cachedAreaDimen.x = get_local_size(0) + filterWidth - 1;
1403 cachedAreaDimen.y = get_local_size(1) + filterHeight - 1;
1406 int localID = get_local_id(1)*get_local_size(0)+get_local_id(0);
1407 int cachedAreaNumPixels = cachedAreaDimen.x * cachedAreaDimen.y;
1408 int groupSize = get_local_size(0) * get_local_size(1);
1409 for (
int i = localID; i < cachedAreaNumPixels; i+=groupSize) {
1411 int2 cachedAreaIndex;
1412 cachedAreaIndex.x = i % cachedAreaDimen.x;
1413 cachedAreaIndex.y = i / cachedAreaDimen.x;
1415 int2 imagePixelIndex;
1416 imagePixelIndex = cachedAreaOrg + cachedAreaIndex;
1420 imagePixelIndex.x = ClampToCanvas(imagePixelIndex.x, imageWidth);
1421 imagePixelIndex.y = ClampToCanvas(imagePixelIndex.y, imageHeight);
1423 pixelLocalCache[i] = input[imagePixelIndex.y * imageWidth + imagePixelIndex.x];
1427 for (
int i = localID; i < filterHeight*filterWidth; i+=groupSize) {
1428 filterCache[i] = filter[i];
1430 barrier(CLK_LOCAL_MEM_FENCE);
1434 imageIndex.x = imageAreaOrg.x + get_local_id(0);
1435 imageIndex.y = imageAreaOrg.y + get_local_id(1);
1438 if (imageIndex.x >= imageWidth
1439 || imageIndex.y >= imageHeight) {
1443 int filterIndex = 0;
1444 float4 sum = (float4)0.0f;
1446 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1447 int cacheIndexY = get_local_id(1);
1448 for (
int j = 0; j < filterHeight; j++) {
1449 int cacheIndexX = get_local_id(0);
1450 for (
int i = 0; i < filterWidth; i++) {
1451 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1452 float f = filterCache[filterIndex];
1467 int cacheIndexY = get_local_id(1);
1468 for (
int j = 0; j < filterHeight; j++) {
1469 int cacheIndexX = get_local_id(0);
1470 for (
int i = 0; i < filterWidth; i++) {
1472 CLPixelType p = pixelLocalCache[cacheIndexY*cachedAreaDimen.x + cacheIndexX];
1473 float alpha = QuantumScale*(QuantumRange-p.w);
1474 float f = filterCache[filterIndex];
1475 float g = alpha * f;
1488 gamma = PerceptibleReciprocal(gamma);
1489 sum.xyz = gamma*sum.xyz;
1491 CLPixelType outputPixel;
1492 outputPixel.x = ClampToQuantum(sum.x);
1493 outputPixel.y = ClampToQuantum(sum.y);
1494 outputPixel.z = ClampToQuantum(sum.z);
1495 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1497 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1503 void Convolve(
const __global CLPixelType *input, __global CLPixelType *output,
1504 const uint imageWidth,
const uint imageHeight,
1505 __constant
float *filter,
const unsigned int filterWidth,
const unsigned int filterHeight,
1506 const uint matte,
const ChannelType channel) {
1509 imageIndex.x = get_global_id(0);
1510 imageIndex.y = get_global_id(1);
1516 if (imageIndex.x >= imageWidth
1517 || imageIndex.y >= imageHeight)
1520 int2 midFilterDimen;
1521 midFilterDimen.x = (filterWidth-1)/2;
1522 midFilterDimen.y = (filterHeight-1)/2;
1524 int filterIndex = 0;
1525 float4 sum = (float4)0.0f;
1527 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
1528 for (
int j = 0; j < filterHeight; j++) {
1529 int2 inputPixelIndex;
1530 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1531 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1532 for (
int i = 0; i < filterWidth; i++) {
1533 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1534 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1536 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1537 float f = filter[filterIndex];
1552 for (
int j = 0; j < filterHeight; j++) {
1553 int2 inputPixelIndex;
1554 inputPixelIndex.y = imageIndex.y - midFilterDimen.y + j;
1555 inputPixelIndex.y = ClampToCanvas(inputPixelIndex.y, imageHeight);
1556 for (
int i = 0; i < filterWidth; i++) {
1557 inputPixelIndex.x = imageIndex.x - midFilterDimen.x + i;
1558 inputPixelIndex.x = ClampToCanvas(inputPixelIndex.x, imageWidth);
1560 CLPixelType p = input[inputPixelIndex.y * imageWidth + inputPixelIndex.x];
1561 float alpha = QuantumScale*(QuantumRange-p.w);
1562 float f = filter[filterIndex];
1563 float g = alpha * f;
1576 gamma = PerceptibleReciprocal(gamma);
1577 sum.xyz = gamma*sum.xyz;
1580 CLPixelType outputPixel;
1581 outputPixel.x = ClampToQuantum(sum.x);
1582 outputPixel.y = ClampToQuantum(sum.y);
1583 outputPixel.z = ClampToQuantum(sum.z);
1584 outputPixel.w = ((channel & OpacityChannel)!=0)?ClampToQuantum(sum.w):input[imageIndex.y * imageWidth + imageIndex.x].w;
1586 output[imageIndex.y * imageWidth + imageIndex.x] = outputPixel;
1604 __kernel
void HullPass1(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1605 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1606 ,
const int2 offset,
const int polarity,
const int matte) {
1608 int x = get_global_id(0);
1609 int y = get_global_id(1);
1611 CLPixelType v = inputImage[y*imageWidth+x];
1614 neighbor.y = y + offset.y;
1615 neighbor.x = x + offset.x;
1617 int2 clampedNeighbor;
1618 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1619 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1621 CLPixelType r = (clampedNeighbor.x == neighbor.x
1622 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1638 \n #pragma unroll 4\n
1639 for (
unsigned int i = 0; i < 4; i++) {
1640 sv[i] = (sr[i] >= (sv[i]+ScaleCharToQuantum(2)))?(sv[i]+ScaleCharToQuantum(1)):sv[i];
1644 \n #pragma unroll 4\n
1645 for (
unsigned int i = 0; i < 4; i++) {
1646 sv[i] = (sr[i] <= (sv[i]-ScaleCharToQuantum(2)))?(sv[i]-ScaleCharToQuantum(1)):sv[i];
1651 v.x = (CLQuantum)sv[0];
1652 v.y = (CLQuantum)sv[1];
1653 v.z = (CLQuantum)sv[2];
1656 v.w = (CLQuantum)sv[3];
1658 outputImage[y*imageWidth+x] = v;
1669 __kernel
void HullPass2(
const __global CLPixelType *inputImage, __global CLPixelType *outputImage
1670 ,
const unsigned int imageWidth,
const unsigned int imageHeight
1671 ,
const int2 offset,
const int polarity,
const int matte) {
1673 int x = get_global_id(0);
1674 int y = get_global_id(1);
1676 CLPixelType v = inputImage[y*imageWidth+x];
1678 int2 neighbor, clampedNeighbor;
1680 neighbor.y = y + offset.y;
1681 neighbor.x = x + offset.x;
1682 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1683 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1685 CLPixelType r = (clampedNeighbor.x == neighbor.x
1686 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1690 neighbor.y = y - offset.y;
1691 neighbor.x = x - offset.x;
1692 clampedNeighbor.x = ClampToCanvas(neighbor.x, imageWidth);
1693 clampedNeighbor.y = ClampToCanvas(neighbor.y, imageHeight);
1695 CLPixelType s = (clampedNeighbor.x == neighbor.x
1696 && clampedNeighbor.y == neighbor.y)?inputImage[clampedNeighbor.y*imageWidth+clampedNeighbor.x]
1719 \n #pragma unroll 4\n
1720 for (
unsigned int i = 0; i < 4; i++) {
1725 sv[i] =(( (int)( ss[i] < (sv[i]+ScaleCharToQuantum(2))) + (int) ( sr[i] <= sv[i] ) ) !=0) ? sv[i]:(sv[i]+ScaleCharToQuantum(1));
1729 \n #pragma unroll 4\n
1730 for (
unsigned int i = 0; i < 4; i++) {
1734 sv[i] = (( (int)(ss[i] > (sv[i]-ScaleCharToQuantum(2))) + (int)( sr[i] >= sv[i] )) !=0) ? sv[i]:(sv[i]-ScaleCharToQuantum(1));
1738 v.x = (CLQuantum)sv[0];
1739 v.y = (CLQuantum)sv[1];
1740 v.z = (CLQuantum)sv[2];
1743 v.w = (CLQuantum)sv[3];
1745 outputImage[y*imageWidth+x] = v;
1767 __kernel
void Equalize(__global CLPixelType * restrict im,
1768 const ChannelType channel,
1769 __global CLPixelType * restrict equalize_map,
1770 const float4 white,
const float4 black)
1772 const int x = get_global_id(0);
1773 const int y = get_global_id(1);
1774 const int columns = get_global_size(0);
1775 const int c = x + y * columns;
1778 CLPixelType oValue, eValue;
1779 CLQuantum red, green, blue, opacity;
1784 if ((channel & SyncChannels) != 0)
1786 if (getRedF4(white) != getRedF4(black))
1788 ePos = ScaleQuantumToMap(getRed(oValue));
1789 eValue = equalize_map[ePos];
1790 red = getRed(eValue);
1791 ePos = ScaleQuantumToMap(getGreen(oValue));
1792 eValue = equalize_map[ePos];
1793 green = getRed(eValue);
1794 ePos = ScaleQuantumToMap(getBlue(oValue));
1795 eValue = equalize_map[ePos];
1796 blue = getRed(eValue);
1797 ePos = ScaleQuantumToMap(getOpacity(oValue));
1798 eValue = equalize_map[ePos];
1799 opacity = getRed(eValue);
1802 im[c]=(CLPixelType)(blue, green, red, opacity);
1830 CLPixelType ApplyFunction(CLPixelType pixel,
const MagickFunction
function,
1831 const unsigned int number_parameters,
1832 __constant
float *parameters)
1834 float4 result = (float4) 0.0f;
1837 case PolynomialFunction:
1839 for (
unsigned int i=0; i < number_parameters; i++)
1840 result = result*(float4)QuantumScale*convert_float4(pixel) + parameters[i];
1841 result *= (float4)QuantumRange;
1844 case SinusoidFunction:
1846 float freq,phase,ampl,bias;
1847 freq = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1848 phase = ( number_parameters >= 2 ) ? parameters[1] : 0.0f;
1849 ampl = ( number_parameters >= 3 ) ? parameters[2] : 0.5f;
1850 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1851 result.x = QuantumRange*(ampl*sin(2.0f*MagickPI*
1852 (freq*QuantumScale*(
float)pixel.x + phase/360.0f)) + bias);
1853 result.y = QuantumRange*(ampl*sin(2.0f*MagickPI*
1854 (freq*QuantumScale*(
float)pixel.y + phase/360.0f)) + bias);
1855 result.z = QuantumRange*(ampl*sin(2.0f*MagickPI*
1856 (freq*QuantumScale*(
float)pixel.z + phase/360.0f)) + bias);
1857 result.w = QuantumRange*(ampl*sin(2.0f*MagickPI*
1858 (freq*QuantumScale*(
float)pixel.w + phase/360.0f)) + bias);
1861 case ArcsinFunction:
1863 float width,range,center,bias;
1864 width = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1865 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1866 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1867 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1869 result.x = 2.0f/width*(QuantumScale*(float)pixel.x - center);
1870 result.x = range/MagickPI*asin(result.x)+bias;
1871 result.x = ( result.x <= -1.0f ) ? bias - range/2.0f : result.x;
1872 result.x = ( result.x >= 1.0f ) ? bias + range/2.0f : result.x;
1874 result.y = 2.0f/width*(QuantumScale*(float)pixel.y - center);
1875 result.y = range/MagickPI*asin(result.y)+bias;
1876 result.y = ( result.y <= -1.0f ) ? bias - range/2.0f : result.y;
1877 result.y = ( result.y >= 1.0f ) ? bias + range/2.0f : result.y;
1879 result.z = 2.0f/width*(QuantumScale*(float)pixel.z - center);
1880 result.z = range/MagickPI*asin(result.z)+bias;
1881 result.z = ( result.z <= -1.0f ) ? bias - range/2.0f : result.x;
1882 result.z = ( result.z >= 1.0f ) ? bias + range/2.0f : result.x;
1885 result.w = 2.0f/width*(QuantumScale*(float)pixel.w - center);
1886 result.w = range/MagickPI*asin(result.w)+bias;
1887 result.w = ( result.w <= -1.0f ) ? bias - range/2.0f : result.w;
1888 result.w = ( result.w >= 1.0f ) ? bias + range/2.0f : result.w;
1890 result *= (float4)QuantumRange;
1893 case ArctanFunction:
1895 float slope,range,center,bias;
1896 slope = ( number_parameters >= 1 ) ? parameters[0] : 1.0f;
1897 center = ( number_parameters >= 2 ) ? parameters[1] : 0.5f;
1898 range = ( number_parameters >= 3 ) ? parameters[2] : 1.0f;
1899 bias = ( number_parameters >= 4 ) ? parameters[3] : 0.5f;
1900 result = (float4)MagickPI*(float4)slope*((float4)QuantumScale*convert_float4(pixel)-(float4)center);
1901 result = (float4)QuantumRange*((float4)range/(float4)MagickPI*atan(result) + (float4)bias);
1904 case UndefinedFunction:
1907 return (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
1908 ClampToQuantum(result.z), ClampToQuantum(result.w));
1920 __kernel
void ComputeFunction(__global CLPixelType *im,
1921 const ChannelType channel,
const MagickFunction
function,
1922 const unsigned int number_parameters, __constant
float *parameters)
1924 const int x = get_global_id(0);
1925 const int y = get_global_id(1);
1926 const int columns = get_global_size(0);
1927 const int c = x + y * columns;
1928 im[c] = ApplyFunction(im[c],
function, number_parameters, parameters);
1945 __kernel
void Grayscale(__global CLPixelType *im,
1946 const int method,
const int colorspace)
1949 const int x = get_global_id(0);
1950 const int y = get_global_id(1);
1951 const int columns = get_global_size(0);
1952 const int c = x + y * columns;
1954 CLPixelType pixel = im[c];
1962 red=(float)getRed(pixel);
1963 green=(float)getGreen(pixel);
1964 blue=(float)getBlue(pixel);
1968 CLPixelType filteredPixel;
1972 case AveragePixelIntensityMethod:
1974 intensity=(red+green+blue)/3.0;
1977 case BrightnessPixelIntensityMethod:
1979 intensity=MagickMax(MagickMax(red,green),blue);
1982 case LightnessPixelIntensityMethod:
1984 intensity=(MagickMin(MagickMin(red,green),blue)+
1985 MagickMax(MagickMax(red,green),blue))/2.0;
1988 case MSPixelIntensityMethod:
1990 intensity=(float) (((
float) red*red+green*green+
1991 blue*blue)/(3.0*QuantumRange));
1994 case Rec601LumaPixelIntensityMethod:
2004 intensity=0.298839*red+0.586811*green+0.114350*blue;
2007 case Rec601LuminancePixelIntensityMethod:
2017 intensity=0.298839*red+0.586811*green+0.114350*blue;
2020 case Rec709LumaPixelIntensityMethod:
2031 intensity=0.212656*red+0.715158*green+0.072186*blue;
2034 case Rec709LuminancePixelIntensityMethod:
2044 intensity=0.212656*red+0.715158*green+0.072186*blue;
2047 case RMSPixelIntensityMethod:
2049 intensity=(float) (sqrt((
float) red*red+green*green+
2050 blue*blue)/sqrt(3.0));
2056 setGray(&filteredPixel, ClampToQuantum(intensity));
2058 filteredPixel.w = pixel.w;
2060 im[c] = filteredPixel;
2077 static inline int mirrorBottom(
int value)
2079 return (value < 0) ? - (value) : value;
2081 static inline int mirrorTop(
int value,
int width)
2083 return (value >= width) ? (2 * width - value - 1) : value;
2086 __kernel
void LocalContrastBlurRow(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *tmpImage,
2088 const int imageWidth,
2089 const int imageHeight)
2091 const float4 RGB = ((float4)(0.2126f, 0.7152f, 0.0722f, 0.0f));
2093 int x = get_local_id(0);
2094 int y = get_global_id(1);
2096 if ((x >= imageWidth) || (y >= imageHeight))
2099 global CLPixelType *src = srcImage + y * imageWidth;
2101 for (
int i = x; i < imageWidth; i += get_local_size(0)) {
2103 float weight = 1.0f;
2106 while ((j + 7) < i) {
2107 for (
int k = 0; k < 8; ++k)
2108 sum += (weight + k) * dot(RGB, convert_float4(src[mirrorBottom(j+k)]));
2113 sum += weight * dot(RGB, convert_float4(src[mirrorBottom(j)]));
2118 while ((j + 7) < radius + i) {
2119 for (
int k = 0; k < 8; ++k)
2120 sum += (weight - k) * dot(RGB, convert_float4(src[mirrorTop(j + k, imageWidth)]));
2124 while (j < radius + i) {
2125 sum += weight * dot(RGB, convert_float4(src[mirrorTop(j, imageWidth)]));
2130 tmpImage[i + y * imageWidth] = sum / ((radius + 1) * (radius + 1));
2136 __kernel
void LocalContrastBlurApplyColumn(__global CLPixelType *srcImage, __global CLPixelType *dstImage, __global
float *blurImage,
2138 const float strength,
2139 const int imageWidth,
2140 const int imageHeight)
2142 const float4 RGB = (float4)(0.2126f, 0.7152f, 0.0722f, 0.0f);
2144 int x = get_global_id(0);
2145 int y = get_global_id(1);
2147 if ((x >= imageWidth) || (y >= imageHeight))
2150 global
float *src = blurImage + x;
2153 float weight = 1.0f;
2156 while ((j + 7) < y) {
2157 for (
int k = 0; k < 8; ++k)
2158 sum += (weight + k) * src[mirrorBottom(j+k) * imageWidth];
2163 sum += weight * src[mirrorBottom(j) * imageWidth];
2168 while ((j + 7) < radius + y) {
2169 for (
int k = 0; k < 8; ++k)
2170 sum += (weight - k) * src[mirrorTop(j + k, imageHeight) * imageWidth];
2174 while (j < radius + y) {
2175 sum += weight * src[mirrorTop(j, imageHeight) * imageWidth];
2180 CLPixelType pixel = srcImage[x + y * imageWidth];
2181 float srcVal = dot(RGB, convert_float4(pixel));
2182 float mult = (srcVal - (sum / ((radius + 1) * (radius + 1)))) * (strength / 100.0f);
2183 mult = (srcVal + mult) / srcVal;
2185 pixel.x = ClampToQuantum(pixel.x * mult);
2186 pixel.y = ClampToQuantum(pixel.y * mult);
2187 pixel.z = ClampToQuantum(pixel.z * mult);
2189 dstImage[x + y * imageWidth] = pixel;
2207 static inline void ConvertRGBToHSL(
const CLQuantum red,
const CLQuantum green,
const CLQuantum blue,
2208 float *hue,
float *saturation,
float *lightness)
2218 tmax=MagickMax(QuantumScale*red,MagickMax(QuantumScale*green, QuantumScale*blue));
2219 tmin=MagickMin(QuantumScale*red,MagickMin(QuantumScale*green, QuantumScale*blue));
2223 *lightness=(tmax+tmin)/2.0;
2231 if (tmax == (QuantumScale*red))
2233 *hue=(QuantumScale*green-QuantumScale*blue)/c;
2234 if ((QuantumScale*green) < (QuantumScale*blue))
2238 if (tmax == (QuantumScale*green))
2239 *hue=2.0+(QuantumScale*blue-QuantumScale*red)/c;
2241 *hue=4.0+(QuantumScale*red-QuantumScale*green)/c;
2244 if (*lightness <= 0.5)
2245 *saturation=c/(2.0*(*lightness));
2247 *saturation=c/(2.0-2.0*(*lightness));
2250 static inline void ConvertHSLToRGB(
const float hue,
const float saturation,
const float lightness,
2251 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2266 if (lightness <= 0.5)
2267 c=2.0*lightness*saturation;
2269 c=(2.0-2.0*lightness)*saturation;
2270 tmin=lightness-0.5*c;
2271 h-=360.0*floor(h/360.0);
2273 x=c*(1.0-fabs(h-2.0*floor(h/2.0)-1.0));
2274 switch ((
int) floor(h) % 6)
2325 *red=ClampToQuantum(QuantumRange*r);
2326 *green=ClampToQuantum(QuantumRange*g);
2327 *blue=ClampToQuantum(QuantumRange*b);
2330 static inline void ModulateHSL(
const float percent_hue,
const float percent_saturation,
const float percent_lightness,
2331 CLQuantum *red,CLQuantum *green,CLQuantum *blue)
2341 ConvertRGBToHSL(*red,*green,*blue,&hue,&saturation,&lightness);
2342 hue+=0.5*(0.01*percent_hue-1.0);
2347 saturation*=0.01*percent_saturation;
2348 lightness*=0.01*percent_lightness;
2349 ConvertHSLToRGB(hue,saturation,lightness,red,green,blue);
2352 __kernel
void Modulate(__global CLPixelType *im,
2353 const float percent_brightness,
2354 const float percent_hue,
2355 const float percent_saturation,
2356 const int colorspace)
2359 const int x = get_global_id(0);
2360 const int y = get_global_id(1);
2361 const int columns = get_global_size(0);
2362 const int c = x + y * columns;
2364 CLPixelType pixel = im[c];
2372 green=getGreen(pixel);
2373 blue=getBlue(pixel);
2380 ModulateHSL(percent_hue, percent_saturation, percent_brightness,
2381 &red, &green, &blue);
2386 CLPixelType filteredPixel;
2388 setRed(&filteredPixel, red);
2389 setGreen(&filteredPixel, green);
2390 setBlue(&filteredPixel, blue);
2391 filteredPixel.w = pixel.w;
2393 im[c] = filteredPixel;
2411 void MotionBlur(
const __global CLPixelType *input, __global CLPixelType *output,
2412 const unsigned int imageWidth,
const unsigned int imageHeight,
2413 const __global
float *filter,
const unsigned int width,
const __global int2* offset,
2415 const ChannelType channel,
const unsigned int matte) {
2418 currentPixel.x = get_global_id(0);
2419 currentPixel.y = get_global_id(1);
2421 if (currentPixel.x >= imageWidth
2422 || currentPixel.y >= imageHeight)
2426 pixel.x = (float)bias.x;
2427 pixel.y = (
float)bias.y;
2428 pixel.z = (float)bias.z;
2429 pixel.w = (
float)bias.w;
2431 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2433 for (
int i = 0; i < width; i++) {
2436 int2 samplePixel = currentPixel + offset[i];
2437 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2438 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2439 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2441 pixel.x += (filter[i] * (float)samplePixelValue.x);
2442 pixel.y += (filter[i] * (float)samplePixelValue.y);
2443 pixel.z += (filter[i] * (float)samplePixelValue.z);
2444 pixel.w += (filter[i] * (float)samplePixelValue.w);
2447 CLPixelType outputPixel;
2448 outputPixel.x = ClampToQuantum(pixel.x);
2449 outputPixel.y = ClampToQuantum(pixel.y);
2450 outputPixel.z = ClampToQuantum(pixel.z);
2451 outputPixel.w = ClampToQuantum(pixel.w);
2452 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2457 for (
int i = 0; i < width; i++) {
2460 int2 samplePixel = currentPixel + offset[i];
2461 samplePixel.x = ClampToCanvas(samplePixel.x, imageWidth);
2462 samplePixel.y = ClampToCanvas(samplePixel.y, imageHeight);
2464 CLPixelType samplePixelValue = input[ samplePixel.y * imageWidth + samplePixel.x];
2466 float alpha = QuantumScale*(QuantumRange-samplePixelValue.w);
2467 float k = filter[i];
2468 pixel.x = pixel.x + k * alpha * samplePixelValue.x;
2469 pixel.y = pixel.y + k * alpha * samplePixelValue.y;
2470 pixel.z = pixel.z + k * alpha * samplePixelValue.z;
2472 pixel.w += k * alpha * samplePixelValue.w;
2476 gamma = PerceptibleReciprocal(gamma);
2477 pixel.xyz = gamma*pixel.xyz;
2479 CLPixelType outputPixel;
2480 outputPixel.x = ClampToQuantum(pixel.x);
2481 outputPixel.y = ClampToQuantum(pixel.y);
2482 outputPixel.z = ClampToQuantum(pixel.z);
2483 outputPixel.w = ClampToQuantum(pixel.w);
2484 output[currentPixel.y * imageWidth + currentPixel.x] = outputPixel;
2502 __kernel
void RadialBlur(
const __global CLPixelType *im, __global CLPixelType *filtered_im,
2504 const unsigned int channel,
const unsigned int matte,
2505 const float2 blurCenter,
2506 __constant
float *cos_theta, __constant
float *sin_theta,
2507 const unsigned int cossin_theta_size)
2509 const int x = get_global_id(0);
2510 const int y = get_global_id(1);
2511 const int columns = get_global_size(0);
2512 const int rows = get_global_size(1);
2513 unsigned int step = 1;
2514 float center_x = (float) x - blurCenter.x;
2515 float center_y = (
float) y - blurCenter.y;
2516 float radius = hypot(center_x, center_y);
2519 float blur_radius = hypot(blurCenter.x, blurCenter.y);
2521 if (radius > MagickEpsilon)
2523 step = (
unsigned int) (blur_radius / radius);
2526 if (step >= cossin_theta_size)
2527 step = cossin_theta_size-1;
2531 result.x = (float)bias.x;
2532 result.y = (
float)bias.y;
2533 result.z = (float)bias.z;
2534 result.w = (
float)bias.w;
2535 float normalize = 0.0f;
2537 if (((channel & OpacityChannel) == 0) || (matte == 0)) {
2538 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2540 result += convert_float4(im[
2541 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2542 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2545 normalize = PerceptibleReciprocal(normalize);
2546 result = result * normalize;
2550 for (
unsigned int i=0; i<cossin_theta_size; i+=step)
2552 float4 p = convert_float4(im[
2553 ClampToCanvas(blurCenter.x+center_x*cos_theta[i]-center_y*sin_theta[i]+0.5f,columns)+
2554 ClampToCanvas(blurCenter.y+center_x*sin_theta[i]+center_y*cos_theta[i]+0.5f, rows)*columns]);
2556 float alpha = (float)(QuantumScale*(QuantumRange-p.w));
2557 result.x += alpha * p.x;
2558 result.y += alpha * p.y;
2559 result.z += alpha * p.z;
2564 gamma = PerceptibleReciprocal(gamma);
2565 normalize = PerceptibleReciprocal(normalize);
2566 result.x = gamma*result.x;
2567 result.y = gamma*result.y;
2568 result.z = gamma*result.z;
2569 result.w = normalize*result.w;
2571 filtered_im[y * columns + x] = (CLPixelType) (ClampToQuantum(result.x), ClampToQuantum(result.y),
2572 ClampToQuantum(result.z), ClampToQuantum(result.w));
2590 float BoxResizeFilter(
const float x)
2598 float CubicBC(
const float x,
const __global
float* resizeFilterCoefficients)
2630 return(resizeFilterCoefficients[0]+x*(x*
2631 (resizeFilterCoefficients[1]+x*resizeFilterCoefficients[2])));
2633 return(resizeFilterCoefficients[3]+x*(resizeFilterCoefficients[4]+x*
2634 (resizeFilterCoefficients[5]+x*resizeFilterCoefficients[6])));
2640 float Sinc(
const float x)
2644 const float alpha=(float) (MagickPI*x);
2645 return sinpi(x)/alpha;
2652 float Triangle(
const float x)
2659 return ((x<1.0f)?(1.0f-x):0.0f);
2665 float Hanning(
const float x)
2671 const float cosine=cos((MagickPI*x));
2672 return(0.5f+0.5f*cosine);
2677 float Hamming(
const float x)
2683 const float cosine=cos((MagickPI*x));
2684 return(0.54f+0.46f*cosine);
2689 float Blackman(
const float x)
2698 const float cosine=cos((MagickPI*x));
2699 return(0.34f+cosine*(0.5f+cosine*0.16f));
2707 static inline float applyResizeFilter(
const float x,
const ResizeWeightingFunctionType filterType,
const __global
float* filterCoefficients)
2713 case SincWeightingFunction:
2714 case SincFastWeightingFunction:
2716 case CubicBCWeightingFunction:
2717 return CubicBC(x,filterCoefficients);
2718 case BoxWeightingFunction:
2719 return BoxResizeFilter(x);
2720 case TriangleWeightingFunction:
2722 case HanningWeightingFunction:
2724 case HammingWeightingFunction:
2726 case BlackmanWeightingFunction:
2737 static inline float getResizeFilterWeight(
const __global
float* resizeFilterCubicCoefficients,
const ResizeWeightingFunctionType resizeFilterType
2738 ,
const ResizeWeightingFunctionType resizeWindowType
2739 ,
const float resizeFilterScale,
const float resizeWindowSupport,
const float resizeFilterBlur,
const float x)
2742 float xBlur = fabs(x/resizeFilterBlur);
2743 if (resizeWindowSupport < MagickEpsilon
2744 || resizeWindowType == BoxWeightingFunction)
2750 scale = resizeFilterScale;
2751 scale = applyResizeFilter(xBlur*scale, resizeWindowType, resizeFilterCubicCoefficients);
2753 float weight = scale * applyResizeFilter(xBlur, resizeFilterType, resizeFilterCubicCoefficients);
2760 const char* accelerateKernels2 =
2764 static inline unsigned int getNumWorkItemsPerPixel(
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2765 return (numWorkItems/pixelPerWorkgroup);
2770 static inline int pixelToCompute(
const unsigned itemID,
const unsigned int pixelPerWorkgroup,
const unsigned int numWorkItems) {
2771 const unsigned int numWorkItemsPerPixel = getNumWorkItemsPerPixel(pixelPerWorkgroup, numWorkItems);
2772 int pixelIndex = itemID/numWorkItemsPerPixel;
2773 pixelIndex = (pixelIndex<pixelPerWorkgroup)?pixelIndex:-1;
2780 __kernel __attribute__((reqd_work_group_size(256, 1, 1)))
2781 void ResizeHorizontalFilter(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
2782 ,
const float xFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
2783 ,
const int resizeFilterType,
const int resizeWindowType
2784 ,
const __global
float* resizeFilterCubicCoefficients
2785 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
2786 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
2787 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
2791 const unsigned int startX = get_group_id(0)*pixelPerWorkgroup;
2792 const unsigned int stopX = MagickMin(startX + pixelPerWorkgroup,filteredColumns);
2793 const unsigned int actualNumPixelToCompute = stopX - startX;
2796 float scale = MagickMax(1.0f/xFactor+MagickEpsilon ,1.0f);
2797 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2798 scale = PerceptibleReciprocal(scale);
2800 const int cacheRangeStartX = MagickMax((
int)((startX+0.5f)/xFactor+MagickEpsilon-support+0.5f),(
int)(0));
2801 const int cacheRangeEndX = MagickMin((
int)(cacheRangeStartX + numCachedPixels), (
int)inputColumns);
2804 const unsigned int y = get_global_id(1);
2805 event_t e = async_work_group_copy(inputImageCache,inputImage+y*inputColumns+cacheRangeStartX,cacheRangeEndX-cacheRangeStartX,0);
2806 wait_group_events(1,&e);
2808 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2809 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2812 const unsigned int chunkStartX = startX + chunk*pixelChunkSize;
2813 const unsigned int chunkStopX = MagickMin(chunkStartX + pixelChunkSize, stopX);
2814 const unsigned int actualNumPixelInThisChunk = chunkStopX - chunkStartX;
2817 const unsigned int itemID = get_local_id(0);
2818 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(0));
2820 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(0));
2822 float4 filteredPixel = (float4)0.0f;
2823 float density = 0.0f;
2826 if (pixelIndex != -1) {
2829 const int x = chunkStartX + pixelIndex;
2832 const float bisect = (x+0.5)/xFactor+MagickEpsilon;
2833 const unsigned int start = (
unsigned int)MagickMax(bisect-support+0.5f,0.0f);
2834 const unsigned int stop = (
unsigned int)MagickMin(bisect+support+0.5f,(
float)inputColumns);
2835 const unsigned int n = stop - start;
2838 unsigned int numStepsPerWorkItem = n / numItems;
2839 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
2841 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
2842 if (startStep < n) {
2843 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
2845 unsigned int cacheIndex = start+startStep-cacheRangeStartX;
2848 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2849 float4 cp = convert_float4(inputImageCache[cacheIndex]);
2851 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2852 , (ResizeWeightingFunctionType)resizeWindowType
2853 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2855 filteredPixel += ((float4)weight)*cp;
2862 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
2863 CLPixelType p = inputImageCache[cacheIndex];
2865 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
2866 , (ResizeWeightingFunctionType)resizeWindowType
2867 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
2869 float alpha = weight * QuantumScale * GetPixelAlpha(p);
2870 float4 cp = convert_float4(p);
2872 filteredPixel.x += alpha * cp.x;
2873 filteredPixel.y += alpha * cp.y;
2874 filteredPixel.z += alpha * cp.z;
2875 filteredPixel.w += weight * cp.w;
2885 if (itemID < actualNumPixelInThisChunk) {
2886 outputPixelCache[itemID] = (float4)0.0f;
2887 densityCache[itemID] = 0.0f;
2889 gammaCache[itemID] = 0.0f;
2891 barrier(CLK_LOCAL_MEM_FENCE);
2894 for (
unsigned int i = 0; i < numItems; i++) {
2895 if (pixelIndex != -1) {
2896 if (itemID%numItems == i) {
2897 outputPixelCache[pixelIndex]+=filteredPixel;
2898 densityCache[pixelIndex]+=density;
2900 gammaCache[pixelIndex]+=gamma;
2904 barrier(CLK_LOCAL_MEM_FENCE);
2907 if (itemID < actualNumPixelInThisChunk) {
2909 float density = densityCache[itemID];
2910 float4 filteredPixel = outputPixelCache[itemID];
2911 if (density!= 0.0f && density != 1.0)
2913 density = PerceptibleReciprocal(density);
2914 filteredPixel *= (float4)density;
2916 filteredImage[y*filteredColumns+chunkStartX+itemID] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
2917 , ClampToQuantum(filteredPixel.y)
2918 , ClampToQuantum(filteredPixel.z)
2919 , ClampToQuantum(filteredPixel.w));
2922 float density = densityCache[itemID];
2923 float gamma = gammaCache[itemID];
2924 float4 filteredPixel = outputPixelCache[itemID];
2926 if (density!= 0.0f && density != 1.0) {
2927 density = PerceptibleReciprocal(density);
2928 filteredPixel *= (float4)density;
2931 gamma = PerceptibleReciprocal(gamma);
2934 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
2935 , ClampToQuantum(gamma*filteredPixel.y)
2936 , ClampToQuantum(gamma*filteredPixel.z)
2937 , ClampToQuantum(filteredPixel.w));
2939 filteredImage[y*filteredColumns+chunkStartX+itemID] = fp;
2950 __kernel __attribute__((reqd_work_group_size(1, 256, 1)))
2951 void ResizeVerticalFilter(
const __global CLPixelType* inputImage,
const unsigned int inputColumns,
const unsigned int inputRows,
const unsigned int matte
2952 ,
const float yFactor, __global CLPixelType* filteredImage,
const unsigned int filteredColumns,
const unsigned int filteredRows
2953 ,
const int resizeFilterType,
const int resizeWindowType
2954 ,
const __global
float* resizeFilterCubicCoefficients
2955 ,
const float resizeFilterScale,
const float resizeFilterSupport,
const float resizeFilterWindowSupport,
const float resizeFilterBlur
2956 , __local CLPixelType* inputImageCache,
const int numCachedPixels,
const unsigned int pixelPerWorkgroup,
const unsigned int pixelChunkSize
2957 , __local float4* outputPixelCache, __local
float* densityCache, __local
float* gammaCache) {
2961 const unsigned int startY = get_group_id(1)*pixelPerWorkgroup;
2962 const unsigned int stopY = MagickMin(startY + pixelPerWorkgroup,filteredRows);
2963 const unsigned int actualNumPixelToCompute = stopY - startY;
2966 float scale = MagickMax(1.0f/yFactor+MagickEpsilon ,1.0f);
2967 const float support = MagickMax(scale*resizeFilterSupport,0.5f);
2968 scale = PerceptibleReciprocal(scale);
2970 const int cacheRangeStartY = MagickMax((
int)((startY+0.5f)/yFactor+MagickEpsilon-support+0.5f),(
int)(0));
2971 const int cacheRangeEndY = MagickMin((
int)(cacheRangeStartY + numCachedPixels), (
int)inputRows);
2974 const unsigned int x = get_global_id(0);
2975 event_t e = async_work_group_strided_copy(inputImageCache, inputImage+cacheRangeStartY*inputColumns+x, cacheRangeEndY-cacheRangeStartY, inputColumns, 0);
2976 wait_group_events(1,&e);
2978 unsigned int totalNumChunks = (actualNumPixelToCompute+pixelChunkSize-1)/pixelChunkSize;
2979 for (
unsigned int chunk = 0; chunk < totalNumChunks; chunk++)
2982 const unsigned int chunkStartY = startY + chunk*pixelChunkSize;
2983 const unsigned int chunkStopY = MagickMin(chunkStartY + pixelChunkSize, stopY);
2984 const unsigned int actualNumPixelInThisChunk = chunkStopY - chunkStartY;
2987 const unsigned int itemID = get_local_id(1);
2988 const unsigned int numItems = getNumWorkItemsPerPixel(actualNumPixelInThisChunk, get_local_size(1));
2990 const int pixelIndex = pixelToCompute(itemID, actualNumPixelInThisChunk, get_local_size(1));
2992 float4 filteredPixel = (float4)0.0f;
2993 float density = 0.0f;
2996 if (pixelIndex != -1) {
2999 const int y = chunkStartY + pixelIndex;
3002 const float bisect = (y+0.5)/yFactor+MagickEpsilon;
3003 const unsigned int start = (
unsigned int)MagickMax(bisect-support+0.5f,0.0f);
3004 const unsigned int stop = (
unsigned int)MagickMin(bisect+support+0.5f,(
float)inputRows);
3005 const unsigned int n = stop - start;
3008 unsigned int numStepsPerWorkItem = n / numItems;
3009 numStepsPerWorkItem += ((numItems*numStepsPerWorkItem)==n?0:1);
3011 const unsigned int startStep = (itemID%numItems)*numStepsPerWorkItem;
3012 if (startStep < n) {
3013 const unsigned int stopStep = MagickMin(startStep+numStepsPerWorkItem, n);
3015 unsigned int cacheIndex = start+startStep-cacheRangeStartY;
3018 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3019 float4 cp = convert_float4(inputImageCache[cacheIndex]);
3021 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3022 , (ResizeWeightingFunctionType)resizeWindowType
3023 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3025 filteredPixel += ((float4)weight)*cp;
3032 for (
unsigned int i = startStep; i < stopStep; i++,cacheIndex++) {
3033 CLPixelType p = inputImageCache[cacheIndex];
3035 float weight = getResizeFilterWeight(resizeFilterCubicCoefficients,(ResizeWeightingFunctionType)resizeFilterType
3036 , (ResizeWeightingFunctionType)resizeWindowType
3037 , resizeFilterScale, resizeFilterWindowSupport, resizeFilterBlur,scale*(start+i-bisect+0.5));
3039 float alpha = weight * QuantumScale * GetPixelAlpha(p);
3040 float4 cp = convert_float4(p);
3042 filteredPixel.x += alpha * cp.x;
3043 filteredPixel.y += alpha * cp.y;
3044 filteredPixel.z += alpha * cp.z;
3045 filteredPixel.w += weight * cp.w;
3055 if (itemID < actualNumPixelInThisChunk) {
3056 outputPixelCache[itemID] = (float4)0.0f;
3057 densityCache[itemID] = 0.0f;
3059 gammaCache[itemID] = 0.0f;
3061 barrier(CLK_LOCAL_MEM_FENCE);
3064 for (
unsigned int i = 0; i < numItems; i++) {
3065 if (pixelIndex != -1) {
3066 if (itemID%numItems == i) {
3067 outputPixelCache[pixelIndex]+=filteredPixel;
3068 densityCache[pixelIndex]+=density;
3070 gammaCache[pixelIndex]+=gamma;
3074 barrier(CLK_LOCAL_MEM_FENCE);
3077 if (itemID < actualNumPixelInThisChunk) {
3079 float density = densityCache[itemID];
3080 float4 filteredPixel = outputPixelCache[itemID];
3081 if (density!= 0.0f && density != 1.0)
3083 density = PerceptibleReciprocal(density);
3084 filteredPixel *= (float4)density;
3086 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = (CLPixelType) (ClampToQuantum(filteredPixel.x)
3087 , ClampToQuantum(filteredPixel.y)
3088 , ClampToQuantum(filteredPixel.z)
3089 , ClampToQuantum(filteredPixel.w));
3092 float density = densityCache[itemID];
3093 float gamma = gammaCache[itemID];
3094 float4 filteredPixel = outputPixelCache[itemID];
3096 if (density!= 0.0f && density != 1.0) {
3097 density = PerceptibleReciprocal(density);
3098 filteredPixel *= (float4)density;
3101 gamma = PerceptibleReciprocal(gamma);
3104 fp = (CLPixelType) ( ClampToQuantum(gamma*filteredPixel.x)
3105 , ClampToQuantum(gamma*filteredPixel.y)
3106 , ClampToQuantum(gamma*filteredPixel.z)
3107 , ClampToQuantum(filteredPixel.w));
3109 filteredImage[(chunkStartY+itemID)*filteredColumns+x] = fp;
3131 __kernel
void UnsharpMaskBlurColumn(
const __global CLPixelType* inputImage,
3132 const __global float4 *blurRowData, __global CLPixelType *filtered_im,
3133 const unsigned int imageColumns,
const unsigned int imageRows,
3134 __local float4* cachedData, __local
float* cachedFilter,
3135 const ChannelType channel,
const __global
float *filter,
const unsigned int width,
3136 const float gain,
const float threshold)
3138 const unsigned int radius = (width-1)/2;
3141 const int groupX = get_group_id(0);
3142 const int groupStartY = get_group_id(1)*get_local_size(1) - radius;
3143 const int groupStopY = (get_group_id(1)+1)*get_local_size(1) + radius;
3145 if (groupStartY >= 0
3146 && groupStopY < imageRows) {
3147 event_t e = async_work_group_strided_copy(cachedData
3148 ,blurRowData+groupStartY*imageColumns+groupX
3149 ,groupStopY-groupStartY,imageColumns,0);
3150 wait_group_events(1,&e);
3153 for (
int i = get_local_id(1); i < (groupStopY - groupStartY); i+=get_local_size(1)) {
3154 cachedData[i] = blurRowData[ClampToCanvas(groupStartY+i,imageRows)*imageColumns+ groupX];
3156 barrier(CLK_LOCAL_MEM_FENCE);
3159 event_t e = async_work_group_copy(cachedFilter,filter,width,0);
3160 wait_group_events(1,&e);
3164 const int cy = get_global_id(1);
3166 if (cy < imageRows) {
3167 float4 blurredPixel = (float4) 0.0f;
3171 \n #ifndef UFACTOR \n
3172 \n #define UFACTOR 8 \n
3175 for ( ; i+UFACTOR < width; )
3177 \n #pragma unroll UFACTOR \n
3178 for (
int j=0; j < UFACTOR; j++, i++)
3180 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3184 for ( ; i < width; i++)
3186 blurredPixel+=cachedFilter[i]*cachedData[i+get_local_id(1)];
3189 blurredPixel = floor((float4)(ClampToQuantum(blurredPixel.x), ClampToQuantum(blurredPixel.y)
3190 ,ClampToQuantum(blurredPixel.z), ClampToQuantum(blurredPixel.w)));
3192 float4 inputImagePixel = convert_float4(inputImage[cy*imageColumns+groupX]);
3193 float4 outputPixel = inputImagePixel - blurredPixel;
3195 float quantumThreshold = QuantumRange*threshold;
3197 int4 mask = isless(fabs(2.0f*outputPixel), (float4)quantumThreshold);
3198 outputPixel = select(inputImagePixel + outputPixel * gain, inputImagePixel, mask);
3201 filtered_im[cy*imageColumns+groupX] = (CLPixelType) (ClampToQuantum(outputPixel.x), ClampToQuantum(outputPixel.y)
3202 ,ClampToQuantum(outputPixel.z), ClampToQuantum(outputPixel.w));
3211 __kernel
void UnsharpMask(__global CLPixelType *im, __global CLPixelType *filtered_im,
3212 __constant
float *filter,
3213 const unsigned int width,
3214 const unsigned int imageColumns,
const unsigned int imageRows,
3215 __local float4 *pixels,
3216 const float gain,
const float threshold,
const unsigned int justBlur)
3218 const int x = get_global_id(0);
3219 const int y = get_global_id(1);
3221 const unsigned int radius = (width - 1) / 2;
3223 int row = y - radius;
3224 int baseRow = get_group_id(1) * get_local_size(1) - radius;
3225 int endRow = (get_group_id(1) + 1) * get_local_size(1) + radius;
3227 while (row < endRow) {
3228 int srcy = (row < 0) ? -row : row;
3229 srcy = (srcy >= imageRows) ? (2 * imageRows - srcy - 1) : srcy;
3231 float4 value = 0.0f;
3233 int ix = x - radius;
3236 while (i + 7 < width) {
3237 for (
int j = 0; j < 8; ++j) {
3239 srcx = (srcx < 0) ? -srcx : srcx;
3240 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3241 value += filter[i + j] * convert_float4(im[srcx + srcy * imageColumns]);
3248 int srcx = (ix < 0) ? -ix : ix;
3249 srcx = (srcx >= imageColumns) ? (2 * imageColumns - srcx - 1) : srcx;
3250 value += filter[i] * convert_float4(im[srcx + srcy * imageColumns]);
3254 pixels[(row - baseRow) * get_local_size(0) + get_local_id(0)] = value;
3255 row += get_local_size(1);
3259 barrier(CLK_LOCAL_MEM_FENCE);
3262 const int px = get_local_id(0);
3263 const int py = get_local_id(1);
3264 const int prp = get_local_size(0);
3265 float4 value = (float4)(0.0f);
3268 while (i + 7 < width) {
3269 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3270 value += (float4)(filter[i]) * pixels[px + (py + i + 1) * prp];
3271 value += (float4)(filter[i]) * pixels[px + (py + i + 2) * prp];
3272 value += (float4)(filter[i]) * pixels[px + (py + i + 3) * prp];
3273 value += (float4)(filter[i]) * pixels[px + (py + i + 4) * prp];
3274 value += (float4)(filter[i]) * pixels[px + (py + i + 5) * prp];
3275 value += (float4)(filter[i]) * pixels[px + (py + i + 6) * prp];
3276 value += (float4)(filter[i]) * pixels[px + (py + i + 7) * prp];
3280 value += (float4)(filter[i]) * pixels[px + (py + i) * prp];
3283 if ((x < imageColumns) && (y < imageRows)) {
3284 if (justBlur == 0) {
3285 float4 srcPixel = convert_float4(im[x + y * imageColumns]);
3286 float4 diff = srcPixel - value;
3288 float quantumThreshold = QuantumRange*threshold;
3290 int4 mask = isless(fabs(2.0f * diff), (float4)quantumThreshold);
3291 value = select(srcPixel + diff * gain, srcPixel, mask);
3293 filtered_im[x + y * imageColumns] = (CLPixelType)(ClampToQuantum(value.s0), ClampToQuantum(value.s1), ClampToQuantum(value.s2), ClampToQuantum(value.s3));
3299 __kernel __attribute__((reqd_work_group_size(64, 4, 1)))
void WaveletDenoise(__global CLPixelType *srcImage, __global CLPixelType *dstImage,
3300 const float threshold,
3302 const int imageWidth,
3303 const int imageHeight)
3305 const int pad = (1 << (passes - 1));
3306 const int tileSize = 64;
3307 const int tileRowPixels = 64;
3308 const float noise[] = { 0.8002, 0.2735, 0.1202, 0.0585, 0.0291, 0.0152, 0.0080, 0.0044 };
3310 CLPixelType stage[16];
3312 local
float buffer[64 * 64];
3314 int srcx = (get_group_id(0) + get_global_offset(0) / tileSize) * (tileSize - 2 * pad) - pad + get_local_id(0);
3315 int srcy = (get_group_id(1) + get_global_offset(1) / 4) * (tileSize - 2 * pad) - pad;
3317 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3318 stage[i / 4] = srcImage[mirrorTop(mirrorBottom(srcx), imageWidth) + (mirrorTop(mirrorBottom(srcy + i) , imageHeight)) * imageWidth];
3322 for (
int channel = 0; channel < 3; ++channel) {
3326 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3327 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s0);
3330 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3331 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s1);
3334 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3335 buffer[get_local_id(0) + i * tileRowPixels] = convert_float(stage[i / 4].s2);
3346 for (
int pass = 0; pass < passes; ++pass) {
3347 const int radius = 1 << pass;
3348 const int x = get_local_id(0);
3349 const float thresh = threshold * noise[pass];
3352 accum[0] = accum[1] = accum[2] = accum[3] = accum[4] = accum[5] = accum[6] = accum[6] = accum[7] = accum[8] = accum[9] = accum[10] = accum[11] = accum[12] = accum[13] = accum[14] = accum[15] = 0.0f;
3357 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3358 const int offset = i * tileRowPixels;
3360 tmp[i / 4] = buffer[x + offset];
3361 pixel = 0.5f * tmp[i / 4] + 0.25 * (buffer[mirrorBottom(x - radius) + offset] + buffer[mirrorTop(x + radius, tileSize) + offset]);
3362 barrier(CLK_LOCAL_MEM_FENCE);
3363 buffer[x + offset] = pixel;
3365 barrier(CLK_LOCAL_MEM_FENCE);
3367 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3368 pixel = 0.5f * buffer[x + i * tileRowPixels] + 0.25 * (buffer[x + mirrorBottom(i - radius) * tileRowPixels] + buffer[x + mirrorTop(i + radius, tileRowPixels) * tileRowPixels]);
3369 float delta = tmp[i / 4] - pixel;
3371 if (delta < -thresh)
3373 else if (delta > thresh)
3377 accum[i / 4] += delta;
3380 barrier(CLK_LOCAL_MEM_FENCE);
3381 if (pass < passes - 1)
3382 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3383 buffer[x + i * tileRowPixels] = tmp[i / 4];
3385 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3386 accum[i / 4] += tmp[i / 4];
3387 barrier(CLK_LOCAL_MEM_FENCE);
3392 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3393 stage[i / 4].s0 = ClampToQuantum(accum[i / 4]);
3396 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3397 stage[i / 4].s1 = ClampToQuantum(accum[i / 4]);
3400 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1))
3401 stage[i / 4].s2 = ClampToQuantum(accum[i / 4]);
3405 barrier(CLK_LOCAL_MEM_FENCE);
3410 if ((get_local_id(0) >= pad) && (get_local_id(0) < tileSize - pad) && (srcx >= 0) && (srcx < imageWidth)) {
3412 for (
int i = get_local_id(1); i < tileSize; i += get_local_size(1)) {
3413 if ((i >= pad) && (i < tileSize - pad) && (srcy + i >= 0) && (srcy + i < imageHeight)) {
3414 dstImage[srcx + (srcy + i) * imageWidth] = stage[i / 4];
3422 #endif // MAGICKCORE_OPENCL_SUPPORT
3424 #if defined(__cplusplus) || defined(c_plusplus)
3428 #endif // MAGICKCORE_ACCELERATE_PRIVATE_H