forked from NVIDIA/Q2RTX
-
Notifications
You must be signed in to change notification settings - Fork 1
/
checkerboard_interleave.comp
187 lines (147 loc) · 6.76 KB
/
checkerboard_interleave.comp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
/*
Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
/*
// ========================================================================== //
Q2RTX uses a novel rendering algorithm for reflections and refractions,
and for splitting the workload between two GPUs in an SLI configuration.
The frame is separated into two checkerboard fields, one with even pixels
and another with odd pixels, like this:
eoeoeoeo
oeoeoeoe
eoeoeoeo
oeoeoeoe
The pixels from the whole frame are then moved to either left and right parts
of the frame (single-GPU case) or processed by separate GPUs (dual-GPU case).
Each half is a dense image containing only one checkerboard field, like so:
eeee oooo
eeee oooo
eeee oooo
eeee oooo
The benefits of separating the frame like this are:
1. In the dual-GPU case, each GPU has information about the entire image,
albeit a bit low-res. That makes it possible to run spatial filters,
in particular the denoisers, on each GPU separately, and then combine
the final colors after compositing. This helps us distribute a very
large part of the frame workload between the GPUs, and minimize
cross-GPU data transfers - compared to a case when we would transfer
all the lighting channels before denoising and run denoising in full
resolution on a single GPU.
2. We can split the reflection and refraction light paths between the fields.
For example, the even field traces only the reflection rays, and replaces
the parameters of the primary surface (normal, albedo) with the parameters
of the reflected surface. The odd field traces only the refraction rays
and does the same with surface parameters. As a result, the denoisers
on each field only see one "primary" surface - either reflected or
refracted, and that surface is continuous, so it can be efficiently
filtered across. Later, the checkerboard fields are interleaved and
slightly blurred to remove the visible checkerboard pattern (this shader).
To get a more complete sampling of the reflection and refraction light
paths, the assignment of fields to left and right half-frames (or GPUs)
is flipped on every frame. This also makes the reference mode converge
on a complete, non-checkerboarded image without spatial filtering.
This shader takes the input images (color and motion vectors) that contain
de-interleaved checkerboard fields in the left and right parts and interleaves
them into "flat" images. In dual-GPU case, there is a cross-GPU copy happening
right before this shader is invoked, see function `vkpt_interleave`.
- Color is either passed through or blurred with a cross-shaped 3x3 filter,
depending on whether the material in that pixel is checkerboarded (water
or glass)
- Motion vectors are passed through and are necessary for temporal AA
// ========================================================================== //
*/
#version 460
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_nonuniform_qualifier : enable
#define GLOBAL_UBO_DESC_SET_IDX 0
#include "global_ubo.h"
#define GLOBAL_TEXTURES_DESC_SET_IDX 1
#include "global_textures.h"
#include "utils.glsl"
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
ivec2 get_input_position(out int other_side_offset)
{
ivec2 pos;
pos.x = int(gl_GlobalInvocationID.x / 2);
pos.y = int(gl_GlobalInvocationID.y);
int px = int(gl_GlobalInvocationID.x & 1);
int py = int(gl_GlobalInvocationID.y & 1);
other_side_offset = global_ubo.width / 2;
bool is_even_checkerboard = px == py;
if(global_ubo.pt_swap_checkerboard != 0)
is_even_checkerboard = !is_even_checkerboard;
if (!is_even_checkerboard)
{
pos.x += global_ubo.width / 2;
other_side_offset = -other_side_offset;
}
return pos;
}
vec4 safe_load(ivec2 pos)
{
if(pos.x >= global_ubo.width || pos.y >= global_ubo.height)
return vec4(0);
return imageLoad(IMG_ASVGF_COLOR, pos);
}
ivec2 get_output_position()
{
return ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y);
}
void main()
{
int other_side_offset;
ivec2 ipos = get_input_position(other_side_offset);
ivec2 opos = get_output_position();
if (opos.x >= global_ubo.width || opos.y >= global_ubo.height)
{
imageStore(IMG_FLAT_COLOR, opos, vec4(0));
return;
}
vec4 center = imageLoad(IMG_ASVGF_COLOR, ipos);
vec4 color;
bool use_center_mv = true;
int checkerboard_flags = int(center.a);
bool is_checkerboarded_surface = (global_ubo.flt_enable != 0) && (bitCount(checkerboard_flags & CHECKERBOARD_FLAG_FIELD_MASK) > 1);
// The alpha channel encodes whether the material uses checkerboard reflections and refractions.
// Don't do the filter if there is no checkerboarding - which improves sharpness and reduces sparkles.
if(is_checkerboarded_surface)
{
vec4 a = safe_load(ipos + ivec2(other_side_offset, 1));
vec4 b = safe_load(ipos + ivec2(other_side_offset, -1));
vec4 c = safe_load(ipos + ivec2(other_side_offset, 0));
vec4 d = safe_load(ipos + ivec2(other_side_offset + (((opos.x & 1) != 0) ? 1 : -1), 0));
if(gl_GlobalInvocationID.x == 0 || gl_GlobalInvocationID.x == global_ubo.width - 1)
{
c = vec4(0);
d = vec4(0);
}
vec4 neighbors = (a + b + c + d) * 0.25;
// Make sure that transparent surfaces contain motion vectors for the brightest component
// (reflection or refraction), not a mix of both. Helps with TAA quality.
if(luminance(neighbors.rgb) > luminance(center.rgb))
use_center_mv = false;
// Blend the color, not the checkerboard flags.
color.rgb = mix(center.rgb, neighbors.rgb, 0.5);
color.a = center.a;
}
else
{
color = center;
}
imageStore(IMG_FLAT_COLOR, opos, color);
if(use_center_mv)
imageStore(IMG_FLAT_MOTION, opos, imageLoad(IMG_PT_MOTION, ipos));
else
imageStore(IMG_FLAT_MOTION, opos, imageLoad(IMG_PT_MOTION, ipos + ivec2(other_side_offset, 0)));
}