aboutsummaryrefslogtreecommitdiff
path: root/cl_kernel/kernel_gauss.cl
blob: 4e66d483862ff8334ad4f633200997cc21da728a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
/*
 * function: kernel_gauss
 * input:    image2d_t as read only
 * output:   image2d_t as write only
 * workitem = 4x2 pixel ouptut
 * GAUSS_RADIUS must be defined in build options.
 */

#ifndef GAUSS_RADIUS
#define GAUSS_RADIUS 2
#endif

#define GAUSS_SCALE (2 * GAUSS_RADIUS + 1)

__kernel void kernel_gauss (__read_only image2d_t input, __write_only image2d_t output, __global float *table)
{
    int x = get_global_id (0);
    int y = get_global_id (1);
    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

    float4 in1;
    int i, j;
    int index;
    float4 out1 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
    float4 out2 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);

    for(i = 0; i < GAUSS_SCALE + 1; i++)
        for(j = 0; j < GAUSS_SCALE + 3; j++) {
            in1 = read_imagef (input, sampler, (int2)(4 * x - GAUSS_RADIUS + j, 2 * y - GAUSS_RADIUS + i));
            //first line
            if (i < GAUSS_SCALE) {
                index = i * GAUSS_SCALE + j;
                out1.x +=  (j < GAUSS_SCALE ? table[index] * in1.x : 0.0f);
                index -= 1;
                out1.y += ((j < GAUSS_SCALE + 1) && j > 0 ? table[index] * in1.x : 0.0f);
                index -= 1;
                out1.z += ((j < GAUSS_SCALE + 2) && j > 1 ? table[index] * in1.x : 0.0f);
                index -= 1;
                out1.w += (j > 2 ? table[index] * in1.x : 0.0f);
            }
            //second line
            if (i > 0) {
                index = (i - 1) * GAUSS_SCALE + j;
                out2.x +=  (j < GAUSS_SCALE ? table[index] * in1.x : 0.0f);
                index -= 1;
                out2.y += ((j < GAUSS_SCALE + 1) && j > 0 ? table[index] * in1.x : 0.0f);
                index -= 1;
                out2.z += ((j < GAUSS_SCALE + 2) && j > 1 ? table[index] * in1.x : 0.0f);
                index -= 1;
                out2.w += (j > 2 ? table[index] * in1.x : 0.0f);
            }
        }

    write_imagef(output, (int2)(x, 2 * y), out1);
    write_imagef(output, (int2)(x,  2 * y + 1), out2);

}