From fa87e45610d7a4dd3380d771695010a652de8da7 Mon Sep 17 00:00:00 2001 From: Jeff Johnson Date: Tue, 29 Jan 2013 15:08:17 -0700 Subject: [PATCH] Kuwahara filter optimizations --- framework/Source/GPUImage.h | 4 +- framework/Source/GPUImageKuwaharaFilter.m | 132 ++++---- .../Source/GPUImageKuwaharaRadius3Filter.h | 8 + .../Source/GPUImageKuwaharaRadius3Filter.m | 286 ++++++++++++++++++ 4 files changed, 372 insertions(+), 58 deletions(-) create mode 100644 framework/Source/GPUImageKuwaharaRadius3Filter.h create mode 100644 framework/Source/GPUImageKuwaharaRadius3Filter.m diff --git a/framework/Source/GPUImage.h b/framework/Source/GPUImage.h index 11b7861b1..565168456 100755 --- a/framework/Source/GPUImage.h +++ b/framework/Source/GPUImage.h @@ -39,6 +39,7 @@ #import "GPUImageMultiplyBlendFilter.h" #import "GPUImageDissolveBlendFilter.h" #import "GPUImageKuwaharaFilter.h" +#import "GPUImageKuwaharaRadius3Filter.h" #import "GPUImageVignetteFilter.h" #import "GPUImageGaussianBlurFilter.h" #import "GPUImageGaussianBlurPositionFilter.h" @@ -142,6 +143,3 @@ #import "GPUImageParallelCoordinateLineTransformFilter.h" #import "GPUImageThresholdSketchFilter.h" #import "GPUImageLineGenerator.h" -#import "GPUImageLinearBurnBlendFilter.h" -#import "GPUImageGaussianBlurPositionFilter.h" -#import "GPUImagePixellatePositionFilter.h" diff --git a/framework/Source/GPUImageKuwaharaFilter.m b/framework/Source/GPUImageKuwaharaFilter.m index e779c11c6..8c74e661a 100755 --- a/framework/Source/GPUImageKuwaharaFilter.m +++ b/framework/Source/GPUImageKuwaharaFilter.m @@ -15,64 +15,86 @@ precision highp float; - const vec2 src_size = vec2 (768.0, 1024.0); + const vec2 src_size = vec2 (1.0 / 768.0, 1.0 / 1024.0); void main (void) { - vec2 uv = textureCoordinate; - float n = float((radius + 1) * (radius + 1)); - - vec3 m[4]; - vec3 s[4]; - for (int k = 0; k < 4; ++k) { - m[k] = vec3(0.0); - s[k] = vec3(0.0); - } - - for (int j = -radius; j <= 0; ++j) { - for (int i = -radius; i <= 0; ++i) { - vec3 c = texture2D(inputImageTexture, uv + vec2(i,j) / src_size).rgb; - m[0] += c; - s[0] += c * c; - } - } - - for (int j = -radius; j <= 0; ++j) { - for (int i = 0; i <= radius; ++i) { - vec3 c = texture2D(inputImageTexture, uv + vec2(i,j) / src_size).rgb; - m[1] += c; - s[1] += c * c; - } - } - - for (int j = 0; j <= radius; ++j) { - for (int i = 0; i <= radius; ++i) { - vec3 c = texture2D(inputImageTexture, uv + vec2(i,j) / src_size).rgb; - m[2] += c; - s[2] += c * c; - } - } - - for (int j = 0; j <= radius; ++j) { - for (int i = -radius; i <= 0; ++i) { - vec3 c = texture2D(inputImageTexture, uv + vec2(i,j) / src_size).rgb; - m[3] += c; - s[3] += c * c; - } - } - - - float min_sigma2 = 1e+2; - for (int k = 0; k < 4; ++k) { - m[k] /= n; - s[k] = abs(s[k] / n - m[k] * m[k]); - - float sigma2 = s[k].r + s[k].g + s[k].b; - if (sigma2 < min_sigma2) { - min_sigma2 = sigma2; - gl_FragColor = vec4(m[k], 1.0); - } - } + vec2 uv = textureCoordinate; + float n = float((radius + 1) * (radius + 1)); + int i; int j; + vec3 m0 = vec3(0.0); vec3 m1 = vec3(0.0); vec3 m2 = vec3(0.0); vec3 m3 = vec3(0.0); + vec3 s0 = vec3(0.0); vec3 s1 = vec3(0.0); vec3 s2 = vec3(0.0); vec3 s3 = vec3(0.0); + vec3 c; + + for (j = -radius; j <= 0; ++j) { + for (i = -radius; i <= 0; ++i) { + c = texture2D(inputImageTexture, uv + vec2(i,j) * src_size).rgb; + m0 += c; + s0 += c * c; + } + } + + for (j = -radius; j <= 0; ++j) { + for (i = 0; i <= radius; ++i) { + c = texture2D(inputImageTexture, uv + vec2(i,j) * src_size).rgb; + m1 += c; + s1 += c * c; + } + } + + for (j = 0; j <= radius; ++j) { + for (i = 0; i <= radius; ++i) { + c = texture2D(inputImageTexture, uv + vec2(i,j) * src_size).rgb; + m2 += c; + s2 += c * c; + } + } + + for (j = 0; j <= radius; ++j) { + for (i = -radius; i <= 0; ++i) { + c = texture2D(inputImageTexture, uv + vec2(i,j) * src_size).rgb; + m3 += c; + s3 += c * c; + } + } + + + float min_sigma2 = 1e+2; + m0 /= n; + s0 = abs(s0 / n - m0 * m0); + + float sigma2 = s0.r + s0.g + s0.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m0, 1.0); + } + + m1 /= n; + s1 = abs(s1 / n - m1 * m1); + + sigma2 = s1.r + s1.g + s1.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m1, 1.0); + } + + m2 /= n; + s2 = abs(s2 / n - m2 * m2); + + sigma2 = s2.r + s2.g + s2.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m2, 1.0); + } + + m3 /= n; + s3 = abs(s3 / n - m3 * m3); + + sigma2 = s3.r + s3.g + s3.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m3, 1.0); + } } ); diff --git a/framework/Source/GPUImageKuwaharaRadius3Filter.h b/framework/Source/GPUImageKuwaharaRadius3Filter.h new file mode 100644 index 000000000..c4591b817 --- /dev/null +++ b/framework/Source/GPUImageKuwaharaRadius3Filter.h @@ -0,0 +1,8 @@ +// +// GPUImageKuwaharaRadius3Filter.h + +#import "GPUImageFilter.h" + +@interface GPUImageKuwaharaRadius3Filter : GPUImageFilter + +@end diff --git a/framework/Source/GPUImageKuwaharaRadius3Filter.m b/framework/Source/GPUImageKuwaharaRadius3Filter.m new file mode 100644 index 000000000..086154922 --- /dev/null +++ b/framework/Source/GPUImageKuwaharaRadius3Filter.m @@ -0,0 +1,286 @@ +#import "GPUImageKuwaharaRadius3Filter.h" + +// Sourced from Kyprianidis, J. E., Kang, H., and Doellner, J. "Anisotropic Kuwahara Filtering on the GPU," GPU Pro p.247 (2010). +// +// Original header: +// +// Anisotropic Kuwahara Filtering on the GPU +// by Jan Eric Kyprianidis + +NSString *const kGPUImageKuwaharaRadius3FragmentShaderString = SHADER_STRING +( + varying highp vec2 textureCoordinate; + uniform sampler2D inputImageTexture; + + precision highp float; + + const vec2 src_size = vec2 (1.0 / 768.0, 1.0 / 1024.0); + + void main (void) + { + vec2 uv = textureCoordinate; + float n = float(16); // radius is assumed to be 3 + vec3 m0 = vec3(0.0); vec3 m1 = vec3(0.0); vec3 m2 = vec3(0.0); vec3 m3 = vec3(0.0); + vec3 s0 = vec3(0.0); vec3 s1 = vec3(0.0); vec3 s2 = vec3(0.0); vec3 s3 = vec3(0.0); + vec3 c; + vec3 cSq; + + c = texture2D(inputImageTexture, uv + vec2(-3,-3) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-3,-2) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-3,-1) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-3,0) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m1 += c; + s1 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(-2,-3) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-2,-2) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-2,-1) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-2,0) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m1 += c; + s1 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(-1,-3) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-1,-2) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-1,-1) * src_size).rgb; + m0 += c; + s0 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-1,0) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m1 += c; + s1 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(0,-3) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m3 += c; + s3 += cSq; + c = texture2D(inputImageTexture, uv + vec2(0,-2) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m3 += c; + s3 += cSq; + c = texture2D(inputImageTexture, uv + vec2(0,-1) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m3 += c; + s3 += cSq; + c = texture2D(inputImageTexture, uv + vec2(0,0) * src_size).rgb; + cSq = c * c; + m0 += c; + s0 += cSq; + m1 += c; + s1 += cSq; + m2 += c; + s2 += cSq; + m3 += c; + s3 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(-3,3) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-3,2) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-3,1) * src_size).rgb; + m1 += c; + s1 += c * c; + + c = texture2D(inputImageTexture, uv + vec2(-2,3) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-2,2) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-2,1) * src_size).rgb; + m1 += c; + s1 += c * c; + + c = texture2D(inputImageTexture, uv + vec2(-1,3) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-1,2) * src_size).rgb; + m1 += c; + s1 += c * c; + c = texture2D(inputImageTexture, uv + vec2(-1,1) * src_size).rgb; + m1 += c; + s1 += c * c; + + c = texture2D(inputImageTexture, uv + vec2(0,3) * src_size).rgb; + cSq = c * c; + m1 += c; + s1 += cSq; + m2 += c; + s2 += cSq; + c = texture2D(inputImageTexture, uv + vec2(0,2) * src_size).rgb; + cSq = c * c; + m1 += c; + s1 += cSq; + m2 += c; + s2 += cSq; + c = texture2D(inputImageTexture, uv + vec2(0,1) * src_size).rgb; + cSq = c * c; + m1 += c; + s1 += cSq; + m2 += c; + s2 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(3,3) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(3,2) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(3,1) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(3,0) * src_size).rgb; + cSq = c * c; + m2 += c; + s2 += cSq; + m3 += c; + s3 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(2,3) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(2,2) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(2,1) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(2,0) * src_size).rgb; + cSq = c * c; + m2 += c; + s2 += cSq; + m3 += c; + s3 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(1,3) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(1,2) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(1,1) * src_size).rgb; + m2 += c; + s2 += c * c; + c = texture2D(inputImageTexture, uv + vec2(1,0) * src_size).rgb; + cSq = c * c; + m2 += c; + s2 += cSq; + m3 += c; + s3 += cSq; + + c = texture2D(inputImageTexture, uv + vec2(3,-3) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(3,-2) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(3,-1) * src_size).rgb; + m3 += c; + s3 += c * c; + + c = texture2D(inputImageTexture, uv + vec2(2,-3) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(2,-2) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(2,-1) * src_size).rgb; + m3 += c; + s3 += c * c; + + c = texture2D(inputImageTexture, uv + vec2(1,-3) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(1,-2) * src_size).rgb; + m3 += c; + s3 += c * c; + c = texture2D(inputImageTexture, uv + vec2(1,-1) * src_size).rgb; + m3 += c; + s3 += c * c; + + float min_sigma2 = 1e+2; + m0 /= n; + s0 = abs(s0 / n - m0 * m0); + + float sigma2 = s0.r + s0.g + s0.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m0, 1.0); + } + + m1 /= n; + s1 = abs(s1 / n - m1 * m1); + + sigma2 = s1.r + s1.g + s1.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m1, 1.0); + } + + m2 /= n; + s2 = abs(s2 / n - m2 * m2); + + sigma2 = s2.r + s2.g + s2.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m2, 1.0); + } + + m3 /= n; + s3 = abs(s3 / n - m3 * m3); + + sigma2 = s3.r + s3.g + s3.b; + if (sigma2 < min_sigma2) { + min_sigma2 = sigma2; + gl_FragColor = vec4(m3, 1.0); + } + } + ); + +@implementation GPUImageKuwaharaRadius3Filter + +#pragma mark - +#pragma mark Initialization and teardown + +- (id)init; +{ + if (!(self = [super initWithFragmentShaderFromString:kGPUImageKuwaharaRadius3FragmentShaderString])) + { + return nil; + } + + return self; +} + +@end