forked from NVIDIA/Q2RTX
-
Notifications
You must be signed in to change notification settings - Fork 0
/
asvgf_lf.comp
200 lines (155 loc) · 6.68 KB
/
asvgf_lf.comp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
/*
Copyright (C) 2018 Christoph Schied
Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
// ========================================================================== //
// This is the a-trous wavelet filter for the LF (indirect diffuse) channel.
// It's simpler than the similar filter for the HF channel because it does not
// use variance guidance or moments. Essentially, a simple bilateral blur that
// works with per-pixel spherical harmonics.
//
// See `asvgf.glsl` for general information about denoisers in Q2RTX.
// ========================================================================== //
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_nonuniform_qualifier : enable
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
layout(push_constant, std140) uniform IterationInfo {
uint iteration;
} push;
#define GLOBAL_UBO_DESC_SET_IDX 0
#include "global_ubo.h"
#define GLOBAL_TEXTURES_DESC_SET_IDX 1
#include "global_textures.h"
#include "utils.glsl"
#include "asvgf.glsl"
#include "brdf.glsl"
// Filter kernel.
// The filter operates on a 1/3 resolution signal, where each 3x3 pixel part of the screen
// is represented by a single low-res pixel, whose depth and normal correspond to the
// high-res pixel in the center of that square (i.e. the anchor pixel).
void
filter_image(
sampler2D img_lf_shY,
sampler2D img_lf_CoCg,
out SH filtered_lf)
{
ivec2 ipos_lowres = ivec2(gl_GlobalInvocationID);
ivec2 ipos_hires = ipos_lowres * GRAD_DWN + ivec2(1);
// Load the color of the target low-res pixel
SH color_center_lf = load_SH(img_lf_shY, img_lf_CoCg, ipos_lowres);
if(global_ubo.flt_atrous_lf <= push.iteration)
{
filtered_lf = color_center_lf;
return;
}
// Load the parameters of the anchor pixel
vec3 geo_normal_center = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, ipos_hires, 0).x);
float depth_center = texelFetch(TEX_PT_VIEW_DEPTH_A, ipos_hires, 0).x;
float fwidth_depth = texelFetch(TEX_PT_MOTION, ipos_hires, 0).w;
const int step_size = int(1u << (push.iteration - 1));
SH sum_color_lf = color_center_lf;
float sum_w_lf = 1.0;
// Boundaries for the checkerboard field, either left or right half of the screen
int field_left = 0;
int field_right = global_ubo.width / 2;
if(ipos_hires.x >= field_right)
{
field_left = field_right;
field_right = global_ubo.width;
}
// Compute the weighted average of color and moments from a sparse 3x3 pattern around the target pixel
const int r = 1;
for(int yy = -r; yy <= r; yy++) {
for(int xx = -r; xx <= r; xx++) {
ivec2 p_lowres = ipos_lowres + ivec2(xx, yy) * step_size;
ivec2 p_hires = p_lowres * GRAD_DWN + ivec2(1);
if(xx == 0 && yy == 0)
continue;
float w = float(all(greaterThanEqual(p_hires, ivec2(field_left, 0)))
&& all(lessThan(p_hires, ivec2(field_right, global_ubo.height))));
// Use geometric normals here so that we can blur over larger areas.
// The lighting detail will be partially preserved by spherical harmonics.
vec3 geo_normal = decode_normal(texelFetch(TEX_PT_GEO_NORMAL_A, p_hires, 0).x);
float depth = texelFetch(TEX_PT_VIEW_DEPTH_A, p_hires, 0).x;
float dist_z = abs(depth_center - depth) * fwidth_depth * global_ubo.flt_atrous_depth;
w *= exp(-dist_z / float(step_size * GRAD_DWN));
w *= wavelet_kernel[abs(xx)][abs(yy)];
float w_lf = w;
if(global_ubo.flt_atrous_normal_lf > 0)
{
float GNdotGN = max(0.0, dot(geo_normal_center, geo_normal));
w_lf *= pow(GNdotGN, global_ubo.flt_atrous_normal_lf);
}
SH c_lf = load_SH(img_lf_shY, img_lf_CoCg, p_lowres);
// The 4th iteration has filter footprint big enough to step over obstacles and produce noticeable light leaking.
// Prevent that by throwing away samples that are too bright. This also helps make some shadows a bit sharper.
if(push.iteration == 3)
w_lf *= clamp(1.5 - c_lf.shY.w / color_center_lf.shY.w * 0.25, 0, 1);
accumulate_SH(sum_color_lf, c_lf, w_lf);
sum_w_lf += w_lf;
}
}
filtered_lf.shY = sum_color_lf.shY / sum_w_lf;
filtered_lf.CoCg = sum_color_lf.CoCg / sum_w_lf;
}
void deflicker_image(
sampler2D img_lf_shY,
sampler2D img_lf_CoCg,
out SH filtered_lf)
{
ivec2 ipos_lowres = ivec2(gl_GlobalInvocationID);
SH color_center_lf = load_SH(img_lf_shY, img_lf_CoCg, ipos_lowres);
ivec2 ipos_hires = ipos_lowres * GRAD_DWN + ivec2(1);
SH sum_color_lf = init_SH();
const int r = 1;
const float num_pixels = square(r * 2 + 1) - 1;
for(int yy = -r; yy <= r; yy++) {
for(int xx = -r; xx <= r; xx++) {
ivec2 p_lowres = ipos_lowres + ivec2(xx, yy);
if(xx == 0 && yy == 0)
continue;
SH c_lf = load_SH(img_lf_shY, img_lf_CoCg, p_lowres);
accumulate_SH(sum_color_lf, c_lf, 1.0);
}
}
float max_lum = sum_color_lf.shY.w * global_ubo.flt_atrous_deflicker_lf / num_pixels;
if(color_center_lf.shY.w > max_lum)
{
float ratio = max_lum / color_center_lf.shY.w;
color_center_lf.shY *= ratio;
color_center_lf.CoCg *= ratio;
}
filtered_lf = color_center_lf;
}
void
main()
{
ivec2 ipos = ivec2(gl_GlobalInvocationID);
if(any(greaterThanEqual(ipos * GRAD_DWN, ivec2(global_ubo.current_gpu_slice_width, global_ubo.height))))
return;
SH filtered_lf;
switch(push.iteration) {
case 0: deflicker_image(TEX_ASVGF_ATROUS_PING_LF_SH, TEX_ASVGF_ATROUS_PING_LF_COCG, filtered_lf); break;
case 1: filter_image(TEX_ASVGF_ATROUS_PONG_LF_SH, TEX_ASVGF_ATROUS_PONG_LF_COCG, filtered_lf); break;
case 2: filter_image(TEX_ASVGF_ATROUS_PING_LF_SH, TEX_ASVGF_ATROUS_PING_LF_COCG, filtered_lf); break;
case 3: filter_image(TEX_ASVGF_ATROUS_PONG_LF_SH, TEX_ASVGF_ATROUS_PONG_LF_COCG, filtered_lf); break;
}
switch(push.iteration) {
case 0: STORE_SH(IMG_ASVGF_ATROUS_PONG_LF_SH, IMG_ASVGF_ATROUS_PONG_LF_COCG, ipos, filtered_lf); break;
case 1: STORE_SH(IMG_ASVGF_ATROUS_PING_LF_SH, IMG_ASVGF_ATROUS_PING_LF_COCG, ipos, filtered_lf); break;
case 2: STORE_SH(IMG_ASVGF_ATROUS_PONG_LF_SH, IMG_ASVGF_ATROUS_PONG_LF_COCG, ipos, filtered_lf); break;
case 3: STORE_SH(IMG_ASVGF_ATROUS_PING_LF_SH, IMG_ASVGF_ATROUS_PING_LF_COCG, ipos, filtered_lf); break;
}
}