Skip to content

Commit

Permalink
[GPU] Up to 7x7 resolution scaling
Browse files Browse the repository at this point in the history
  • Loading branch information
Triang3l committed Jul 17, 2022
1 parent e8652e5 commit 14fdf4b
Show file tree
Hide file tree
Showing 106 changed files with 38,595 additions and 37,754 deletions.
3 changes: 1 addition & 2 deletions src/xenia/gpu/d3d12/d3d12_render_target_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1342,8 +1342,7 @@ bool D3D12RenderTargetCache::Resolve(const Memory& memory,
}

// Nothing to copy/clear.
if (!resolve_info.coordinate_info.width_div_8 ||
!resolve_info.coordinate_info.height_div_8) {
if (!resolve_info.coordinate_info.width_div_8 || !resolve_info.height_div_8) {
return true;
}

Expand Down
5 changes: 4 additions & 1 deletion src/xenia/gpu/d3d12/d3d12_texture_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1840,10 +1840,13 @@ bool D3D12TextureCache::LoadTextureDataFromResidentMemoryImpl(Texture& texture,

auto& cbuffer_pool = command_processor_.GetConstantBufferPool();
LoadConstants load_constants;
// 3 bits for each.
assert_true(texture_resolution_scale_x <= 7);
assert_true(texture_resolution_scale_y <= 7);
load_constants.is_tiled_3d_endian_scale =
uint32_t(texture_key.tiled) | (uint32_t(is_3d) << 1) |
(uint32_t(texture_key.endianness) << 2) |
(texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 6);
(texture_resolution_scale_x << 4) | (texture_resolution_scale_y << 7);

// The loop is slices within levels because the base and the levels may need
// different portions of the scaled resolve virtual address space to be
Expand Down
46 changes: 23 additions & 23 deletions src/xenia/gpu/draw_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@

// Very prominent in 545407F2.
DEFINE_bool(
resolve_resolution_scale_duplicate_second_pixel, true,
"When using resolution scale, apply the hack that duplicates the "
"right/lower host pixel in the left and top sides of render target resolve "
"areas to eliminate the gap caused by half-pixel offset (this is necessary "
"for certain games to display the scene graphics).",
resolve_resolution_scale_fill_half_pixel_offset, true,
"When using resolution scaling, apply the hack that stretches the first "
"surely covered host pixel in the left and top sides of render target "
"resolve areas to eliminate the gap caused by the half-pixel offset (this "
"is necessary for certain games to display the scene graphics).",
"GPU");

namespace xe {
Expand Down Expand Up @@ -696,7 +696,8 @@ xenos::CopySampleSelect SanitizeCopySampleSelect(

void GetResolveEdramTileSpan(ResolveEdramInfo edram_info,
ResolveCoordinateInfo coordinate_info,
uint32_t& base_out, uint32_t& row_length_used_out,
uint32_t height_div_8, uint32_t& base_out,
uint32_t& row_length_used_out,
uint32_t& rows_out) {
// Due to 64bpp, and also not to make an assumption that the offsets are
// limited to (80 - 8, 8 - 8) with 2x MSAA, and (40 - 8, 8 - 8) with 4x MSAA,
Expand All @@ -716,8 +717,7 @@ void GetResolveEdramTileSpan(ResolveEdramInfo edram_info,
uint32_t y0 = (coordinate_info.edram_offset_y_div_8 << y_scale_log2) /
xenos::kEdramTileHeightSamples;
uint32_t y1 =
(((coordinate_info.edram_offset_y_div_8 + coordinate_info.height_div_8)
<< y_scale_log2) +
(((coordinate_info.edram_offset_y_div_8 + height_div_8) << y_scale_log2) +
(xenos::kEdramTileHeightSamples - 1)) /
xenos::kEdramTileHeightSamples;
base_out = edram_info.base_tiles + y0 * edram_info.pitch_tiles + x0;
Expand All @@ -744,6 +744,11 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
bool fixed_rg16_truncated_to_minus_1_to_1,
bool fixed_rgba16_truncated_to_minus_1_to_1,
ResolveInfo& info_out) {
// Don't pass uninitialized values to shaders, not to leak data to frame
// captures. Also initialize an invalid resolve to empty.
info_out.coordinate_info.packed = 0;
info_out.height_div_8 = 0;

auto rb_copy_control = regs.Get<reg::RB_COPY_CONTROL>();
info_out.rb_copy_control = rb_copy_control;

Expand All @@ -757,10 +762,6 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
return false;
}

// Don't pass uninitialized values to shaders, not to leak data to frame
// captures.
info_out.coordinate_info.packed = 0;

// Get the extent of pixels covered by the resolve rectangle, according to the
// top-left rasterization rule.
// D3D9 HACK: Vertices to use are always in vf0, and are written by the CPU.
Expand Down Expand Up @@ -876,11 +877,11 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,

info_out.coordinate_info.width_div_8 =
uint32_t(x1 - x0) >> xenos::kResolveAlignmentPixelsLog2;
info_out.coordinate_info.height_div_8 =
info_out.height_div_8 =
uint32_t(y1 - y0) >> xenos::kResolveAlignmentPixelsLog2;
// 2 bits for each.
assert_true(draw_resolution_scale_x <= 3);
assert_true(draw_resolution_scale_y <= 3);
// 3 bits for each.
assert_true(draw_resolution_scale_x <= 7);
assert_true(draw_resolution_scale_y <= 7);
info_out.coordinate_info.draw_resolution_scale_x = draw_resolution_scale_x;
info_out.coordinate_info.draw_resolution_scale_y = draw_resolution_scale_y;

Expand Down Expand Up @@ -1033,9 +1034,9 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
base_offset_y_tiles * surface_pitch_tiles + base_offset_x_tiles;

// Write the color/depth EDRAM info.
bool duplicate_second_pixel =
bool fill_half_pixel_offset =
(draw_resolution_scale_x > 1 || draw_resolution_scale_y > 1) &&
cvars::resolve_resolution_scale_duplicate_second_pixel &&
cvars::resolve_resolution_scale_fill_half_pixel_offset &&
cvars::half_pixel_offset && !regs.Get<reg::PA_SU_VTX_CNTL>().pix_center;
int32_t exp_bias = is_depth ? 0 : rb_copy_dest_info.copy_dest_exp_bias;
ResolveEdramInfo depth_edram_info;
Expand All @@ -1048,7 +1049,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
rb_depth_info.depth_base + edram_base_offset_tiles;
depth_edram_info.format = uint32_t(rb_depth_info.depth_format);
depth_edram_info.format_is_64bpp = 0;
depth_edram_info.duplicate_second_pixel = uint32_t(duplicate_second_pixel);
depth_edram_info.fill_half_pixel_offset = uint32_t(fill_half_pixel_offset);
info_out.depth_original_base = rb_depth_info.depth_base;
} else {
info_out.depth_original_base = 0;
Expand All @@ -1070,7 +1071,7 @@ bool GetResolveInfo(const RegisterFile& regs, const Memory& memory,
color_info.color_base + (edram_base_offset_tiles << is_64bpp);
color_edram_info.format = uint32_t(color_info.color_format);
color_edram_info.format_is_64bpp = is_64bpp;
color_edram_info.duplicate_second_pixel = uint32_t(duplicate_second_pixel);
color_edram_info.fill_half_pixel_offset = uint32_t(fill_half_pixel_offset);
if ((fixed_rg16_truncated_to_minus_1_to_1 &&
color_info.color_format == xenos::ColorRenderTargetFormat::k_16_16) ||
(fixed_rgba16_truncated_to_minus_1_to_1 &&
Expand Down Expand Up @@ -1173,9 +1174,8 @@ ResolveCopyShaderIndex ResolveInfo::GetCopyShader(
uint32_t width =
(coordinate_info.width_div_8 << xenos::kResolveAlignmentPixelsLog2) *
draw_resolution_scale_x;
uint32_t height =
(coordinate_info.height_div_8 << xenos::kResolveAlignmentPixelsLog2) *
draw_resolution_scale_y;
uint32_t height = (height_div_8 << xenos::kResolveAlignmentPixelsLog2) *
draw_resolution_scale_y;
const ResolveCopyShaderInfo& shader_info =
resolve_copy_shader_info[size_t(shader)];
group_count_x_out = (width + ((1 << shader_info.group_size_x_log2) - 1)) >>
Expand Down
28 changes: 16 additions & 12 deletions src/xenia/gpu/draw_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,9 +350,10 @@ union ResolveEdramInfo {
uint32_t base_tiles : xenos::kEdramBaseTilesBits;
uint32_t format : xenos::kRenderTargetFormatBits;
uint32_t format_is_64bpp : 1;
// Whether to take the value of column/row 1 for column/row 0, to reduce
// the impact of the half-pixel offset with resolution scaling.
uint32_t duplicate_second_pixel : 1;
// Whether to fill the half-pixel offset gap on the left and the top sides
// of the resolve region with the contents of the first surely covered
// column / row with resolution scaling.
uint32_t fill_half_pixel_offset : 1;
};
ResolveEdramInfo() : packed(0) { static_assert_size(*this, sizeof(packed)); }
};
Expand All @@ -371,12 +372,10 @@ union ResolveCoordinateInfo {
// totally broken way - in this case, the resolve must be dropped.
uint32_t width_div_8 : xenos::kResolveSizeBits -
xenos::kResolveAlignmentPixelsLog2;
uint32_t height_div_8 : xenos::kResolveSizeBits -
xenos::kResolveAlignmentPixelsLog2;

// 1 to 3.
uint32_t draw_resolution_scale_x : 2;
uint32_t draw_resolution_scale_y : 2;
// 1 to 7.
uint32_t draw_resolution_scale_x : 3;
uint32_t draw_resolution_scale_y : 3;
};
ResolveCoordinateInfo() : packed(0) {
static_assert_size(*this, sizeof(packed));
Expand All @@ -387,8 +386,8 @@ union ResolveCoordinateInfo {
// the area in tiles, but the pitch between rows is edram_info.pitch_tiles.
void GetResolveEdramTileSpan(ResolveEdramInfo edram_info,
ResolveCoordinateInfo coordinate_info,
uint32_t& base_out, uint32_t& row_length_used_out,
uint32_t& rows_out);
uint32_t height_div_8, uint32_t& base_out,
uint32_t& row_length_used_out, uint32_t& rows_out);

union ResolveCopyDestCoordinateInfo {
uint32_t packed;
Expand Down Expand Up @@ -496,6 +495,11 @@ struct ResolveInfo {
uint32_t color_original_base;

ResolveCoordinateInfo coordinate_info;
// Like coordinate_info.width_div_8, but not needed for shaders.
// In pixels.
// May be zero if the original rectangle was somehow specified in a totally
// broken way - in this case, the resolve must be dropped.
uint32_t height_div_8;

reg::RB_COPY_DEST_INFO copy_dest_info;
ResolveCopyDestCoordinateInfo copy_dest_coordinate_info;
Expand Down Expand Up @@ -525,7 +529,7 @@ struct ResolveInfo {
uint32_t& rows_out, uint32_t& pitch_out) const {
ResolveEdramInfo edram_info =
IsCopyingDepth() ? depth_edram_info : color_edram_info;
GetResolveEdramTileSpan(edram_info, coordinate_info, base_out,
GetResolveEdramTileSpan(edram_info, coordinate_info, height_div_8, base_out,
row_length_used_out, rows_out);
pitch_out = edram_info.pitch_tiles;
}
Expand Down Expand Up @@ -570,7 +574,7 @@ struct ResolveInfo {
uint32_t draw_resolution_scale_y) const {
// 8 guest MSAA samples per invocation.
uint32_t width_samples_div_8 = coordinate_info.width_div_8;
uint32_t height_samples_div_8 = coordinate_info.height_div_8;
uint32_t height_samples_div_8 = height_div_8;
xenos::MsaaSamples samples = IsCopyingDepth()
? depth_edram_info.msaa_samples
: color_edram_info.msaa_samples;
Expand Down
2 changes: 1 addition & 1 deletion src/xenia/gpu/render_target_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1077,7 +1077,7 @@ bool RenderTargetCache::PrepareHostRenderTargetsResolveClear(
std::min(uint32_t(resolve_info.coordinate_info.width_div_8) << 3,
pitch_pixels - clear_rectangle.x_pixels);
clear_rectangle.height_pixels =
std::min(uint32_t(resolve_info.coordinate_info.height_div_8) << 3,
std::min(uint32_t(resolve_info.height_div_8) << 3,
render_target_height_pixels - clear_rectangle.y_pixels);
if (!clear_rectangle.width_pixels || !clear_rectangle.height_pixels) {
// Outside the pitch / height (or initially specified as 0).
Expand Down
7 changes: 5 additions & 2 deletions src/xenia/gpu/render_target_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,8 @@ class RenderTargetCache {
uint32_t constant;
struct {
uint32_t pitch_tiles : xenos::kEdramPitchTilesBits;
uint32_t resolution_scale_x : 2;
uint32_t resolution_scale_y : 2;
uint32_t resolution_scale_x : 3;
uint32_t resolution_scale_y : 3;
// Whether 2x MSAA is supported natively rather than through 4x.
uint32_t msaa_2x_supported : 1;
};
Expand Down Expand Up @@ -513,6 +513,9 @@ class RenderTargetCache {
uint32_t pitch_tiles, bool msaa_2x_supported) const {
HostDepthStoreRenderTargetConstant constant;
constant.pitch_tiles = pitch_tiles;
// 3 bits for each.
assert_true(draw_resolution_scale_x() <= 7);
assert_true(draw_resolution_scale_y() <= 7);
constant.resolution_scale_x = draw_resolution_scale_x();
constant.resolution_scale_y = draw_resolution_scale_y();
constant.msaa_2x_supported = uint32_t(msaa_2x_supported);
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 14fdf4b

Please sign in to comment.