aboutsummaryrefslogtreecommitdiff
path: root/shaders/cl/kernel_bi_filter.cl
blob: 4023c12089652ecdbe377175dfc4642519f4685f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
/*
 * function:     kernel_bi_filter
 *               bilateral filter
 * input_y:      Y channel image2d_t as read only
 * input_dark:   dark channel image2d_t as read only
 * output_dark:  dark channel image2d_t as write only
 *
 * data_type      CL_UNSIGNED_INT16
 * channel_order  CL_RGBA
 */

#define PATCH_RADIUS 7
#define PATCH_DIAMETER (2 * PATCH_RADIUS + 1)

#define CALC_SUM(y1,y2,y3,dark1,dark2,dark3) \
    cur_y = (float8)(y1, y2, y3); \
    cur_dark = (float8)(dark1, dark2, dark3); \
    calc_sum (cur_y, cur_dark, center_y, &weight_sum, &data_sum);

__inline void calc_sum (float8 cur_y, float8 cur_dark, float8 center_y, float8 *weight_sum, float8 *data_sum)
{
    float8 delta = (cur_y - center_y) / 28.0f;
    delta = -0.5f * delta * delta;

    float8 weight = native_exp(delta);
    float8 data = cur_dark * weight;
    (*weight_sum) += weight;
    (*data_sum) += data;
}

__kernel void kernel_bi_filter (
    __read_only image2d_t input_y,
    __read_only image2d_t input_dark,
    __write_only image2d_t output_dark)
{
    int pos_x = get_global_id (0);
    int pos_y = get_global_id (1);
    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;

    float8 y1, y2, dark1, dark2;
    float8 cur_y, cur_dark;

    float8 weight_sum = 0.0f;
    float8 data_sum = 0.0f;
    float8 center_y = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_y, sampler, (int2)(pos_x, pos_y)))));
    for (int i = 0; i < PATCH_DIAMETER; i++) {
        y1 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_y, sampler, (int2)(pos_x - 1, pos_y - PATCH_RADIUS + i)))));
        y2 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_y, sampler, (int2)(pos_x, pos_y - PATCH_RADIUS + i)))));
        dark1 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_dark, sampler, (int2)(pos_x - 1, pos_y - PATCH_RADIUS + i)))));
        dark2 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_dark, sampler, (int2)(pos_x, pos_y - PATCH_RADIUS + i)))));
        CALC_SUM (y1.s1234, y1.s567, y2.s0, dark1.s1234, dark1.s567, dark2.s0);
        CALC_SUM (y1.s2345, y1.s67, y2.s01, dark1.s2345, dark1.s67, dark2.s01);
        CALC_SUM (y1.s3456, y1.s7, y2.s012, dark1.s3456, dark1.s7, dark2.s012);
        CALC_SUM (y1.s4567, y2.s01, y2.s23, dark1.s4567, dark2.s01, dark2.s23);
        CALC_SUM (y1.s567, y2.s0123, y2.s4, dark1.s567, dark2.s0123, dark2.s4);
        CALC_SUM (y1.s67, y2.s0123, y2.s45, dark1.s67, dark2.s0123, dark2.s45);
        CALC_SUM (y1.s7, y2.s0123, y2.s456, dark1.s7, dark2.s0123, dark2.s456);
        CALC_SUM (y2.s0123, y2.s45, y2.s67, dark2.s0123, dark2.s45, dark2.s67);

        y1 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_y, sampler, (int2)(pos_x + 1, pos_y - PATCH_RADIUS + i)))));
        dark1 = convert_float8(as_uchar8(convert_ushort4(read_imageui(input_dark, sampler, (int2)(pos_x + 1, pos_y - PATCH_RADIUS + i)))));
        CALC_SUM (y2.s1234, y2.s567, y1.s0, dark2.s1234, dark2.s567, dark1.s0);
        CALC_SUM (y2.s2345, y2.s67, y1.s01, dark2.s2345, dark2.s67, dark1.s01);
        CALC_SUM (y2.s3456, y2.s7, y1.s012, dark2.s3456, dark2.s7, dark1.s012);
        CALC_SUM (y2.s4567, y1.s01, y1.s23, dark2.s4567, dark1.s01, dark1.s23);
        CALC_SUM (y2.s567, y1.s0123, y1.s4, dark2.s567, dark1.s0123, dark1.s4);
        CALC_SUM (y2.s67, y1.s0123, y1.s45, dark2.s67, dark1.s0123, dark1.s45);
        CALC_SUM (y2.s7, y1.s0123, y1.s456, dark2.s7, dark1.s0123, dark1.s456);
    }

    float8 out_data = data_sum / weight_sum;
    write_imageui(output_dark, (int2)(pos_x, pos_y), convert_uint4(as_ushort4(convert_uchar8(out_data))));
}