Skip to content

Commit

Permalink
[GPU] Treat non-adaptive-tessellated patches as 1-control-point
Browse files Browse the repository at this point in the history
  • Loading branch information
Triang3l committed Jul 24, 2022
1 parent 3c12814 commit 37579d3
Show file tree
Hide file tree
Showing 30 changed files with 4,223 additions and 388 deletions.
16 changes: 14 additions & 2 deletions src/xenia/gpu/d3d12/d3d12_command_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2428,13 +2428,25 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
switch (primitive_processing_result.host_primitive_type) {
// TODO(Triang3l): Support all primitive types.
case xenos::PrimitiveType::kTriangleList:
case xenos::PrimitiveType::kTrianglePatch:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST;
break;
case xenos::PrimitiveType::kQuadList:
case xenos::PrimitiveType::kQuadPatch:
primitive_topology = D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST;
break;
case xenos::PrimitiveType::kTrianglePatch:
primitive_topology =
(regs.Get<reg::VGT_HOS_CNTL>().tess_mode ==
xenos::TessellationMode::kAdaptive)
? D3D_PRIMITIVE_TOPOLOGY_3_CONTROL_POINT_PATCHLIST
: D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST;
break;
case xenos::PrimitiveType::kQuadPatch:
primitive_topology =
(regs.Get<reg::VGT_HOS_CNTL>().tess_mode ==
xenos::TessellationMode::kAdaptive)
? D3D_PRIMITIVE_TOPOLOGY_4_CONTROL_POINT_PATCHLIST
: D3D_PRIMITIVE_TOPOLOGY_1_CONTROL_POINT_PATCHLIST;
break;
default:
XELOGE(
"Host tessellated primitive type {} returned by the primitive "
Expand Down
46 changes: 34 additions & 12 deletions src/xenia/gpu/d3d12/pipeline_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,14 @@ namespace d3d12 {
namespace shaders {
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_quad_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/adaptive_triangle_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_1cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_quad_4cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_1cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/continuous_triangle_3cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_1cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_quad_4cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_1cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/discrete_triangle_3cp_hs.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/float24_round_ps.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/float24_truncate_ps.h"
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/tessellation_adaptive_vs.h"
Expand Down Expand Up @@ -2855,15 +2859,24 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
case xenos::TessellationMode::kDiscrete:
switch (host_vertex_shader_type) {
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
state_desc.HS.pShaderBytecode = shaders::discrete_triangle_3cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::discrete_triangle_3cp_hs);
break;
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
state_desc.HS.pShaderBytecode = shaders::discrete_triangle_hs;
state_desc.HS.pShaderBytecode = shaders::discrete_triangle_1cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::discrete_triangle_hs);
sizeof(shaders::discrete_triangle_1cp_hs);
break;
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
state_desc.HS.pShaderBytecode = shaders::discrete_quad_4cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::discrete_quad_4cp_hs);
break;
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
state_desc.HS.pShaderBytecode = shaders::discrete_quad_hs;
state_desc.HS.BytecodeLength = sizeof(shaders::discrete_quad_hs);
state_desc.HS.pShaderBytecode = shaders::discrete_quad_1cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::discrete_quad_1cp_hs);
break;
default:
assert_unhandled_case(host_vertex_shader_type);
Expand All @@ -2873,15 +2886,24 @@ ID3D12PipelineState* PipelineCache::CreateD3D12Pipeline(
case xenos::TessellationMode::kContinuous:
switch (host_vertex_shader_type) {
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
state_desc.HS.pShaderBytecode = shaders::continuous_triangle_3cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::continuous_triangle_3cp_hs);
break;
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
state_desc.HS.pShaderBytecode = shaders::continuous_triangle_hs;
state_desc.HS.pShaderBytecode = shaders::continuous_triangle_1cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::continuous_triangle_hs);
sizeof(shaders::continuous_triangle_1cp_hs);
break;
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
state_desc.HS.pShaderBytecode = shaders::continuous_quad_4cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::continuous_quad_4cp_hs);
break;
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
state_desc.HS.pShaderBytecode = shaders::continuous_quad_hs;
state_desc.HS.BytecodeLength = sizeof(shaders::continuous_quad_hs);
state_desc.HS.pShaderBytecode = shaders::continuous_quad_1cp_hs;
state_desc.HS.BytecodeLength =
sizeof(shaders::continuous_quad_1cp_hs);
break;
default:
assert_unhandled_case(host_vertex_shader_type);
Expand Down
83 changes: 26 additions & 57 deletions src/xenia/gpu/dxbc_shader_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,6 @@ void DxbcShaderTranslator::Reset() {
in_reg_ps_front_face_sample_index_ = UINT32_MAX;

in_domain_location_used_ = 0;
in_primitive_id_used_ = false;
in_control_point_index_used_ = false;
in_position_used_ = 0;
in_front_face_used_ = false;
Expand Down Expand Up @@ -523,18 +522,14 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
: dxbc::Dest::R(0, 0b0111),
dxbc::Src::VDomain(0b000110));
if (register_count() >= 2) {
// Remap and write the primitive index to r1.x as floating-point.
uint32_t primitive_id_temp =
uses_register_dynamic_addressing ? PushSystemTemp() : 1;
in_primitive_id_used_ = true;
RemapAndConvertVertexIndices(primitive_id_temp, 0b0001,
dxbc::Src::VPrim());
if (uses_register_dynamic_addressing) {
a_.OpMov(dxbc::Dest::X(0, 1, 0b0001),
dxbc::Src::R(primitive_id_temp, dxbc::Src::kXXXX));
// Release primitive_id_temp.
PopSystemTemp();
}
// Copy the patch index (already swapped and converted to float by the
// host vertex and hull shaders) to r1.x.
in_control_point_index_used_ = true;
a_.OpMov(uses_register_dynamic_addressing
? dxbc::Dest::X(0, 1, 0b0001)
: dxbc::Dest::R(1, 0b0001),
dxbc::Src::VICP(0, kInRegisterDSControlPointIndex,
dxbc::Src::kXXXX));
// Write the swizzle of the barycentric coordinates to r1.y. It
// appears that the tessellator offloads the reordering of coordinates
// for edges to game shaders.
Expand Down Expand Up @@ -604,19 +599,13 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() {
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0110)
: dxbc::Dest::R(0, 0b0110),
dxbc::Src::VDomain(0b010000));
// Remap and write the primitive index to r0.x as floating-point.
// 4D5307F1 ground quad patches use the primitive index offset.
uint32_t primitive_id_temp =
uses_register_dynamic_addressing ? PushSystemTemp() : 0;
in_primitive_id_used_ = true;
RemapAndConvertVertexIndices(primitive_id_temp, 0b0001,
dxbc::Src::VPrim());
if (uses_register_dynamic_addressing) {
a_.OpMov(dxbc::Dest::X(0, 0, 0b0001),
dxbc::Src::R(primitive_id_temp, dxbc::Src::kXXXX));
// Release primitive_id_temp.
PopSystemTemp();
}
// Copy the patch index (already swapped and converted to float by the
// host vertex and hull shaders) to r0.x.
in_control_point_index_used_ = true;
a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0001)
: dxbc::Dest::R(0, 0b0001),
dxbc::Src::VICP(0, kInRegisterDSControlPointIndex,
dxbc::Src::kXXXX));
if (register_count() >= 2) {
// Write the swizzle of the UV coordinates to r1.x. It appears that
// the tessellator offloads the reordering of coordinates for edges to
Expand Down Expand Up @@ -2763,10 +2752,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
} else if (IsDxbcDomainShader()) {
// Control point indices, byte-swapped, biased according to the base index
// and converted to float by the host vertex and hull shaders
// (XEVERTEXID). Needed even for patch-indexed tessellation modes because
// hull and domain shaders have strict linkage requirements, all hull shader
// outputs must be declared in a domain shader, and the same hull shaders
// are used for control-point-indexed and patch-indexed tessellation modes.
// (XEVERTEXID).
size_t control_point_index_position = shader_object_.size();
shader_object_.resize(shader_object_.size() + kParameterDwords);
++parameter_count;
Expand Down Expand Up @@ -3351,23 +3337,27 @@ void DxbcShaderTranslator::WriteShaderCode() {

Modification shader_modification = GetDxbcShaderModification();

uint32_t control_point_count = 1;
if (IsDxbcDomainShader()) {
// Not using control point data since Xenos only has a vertex shader acting
// as both vertex shader and domain shader.
uint32_t control_point_count = 3;
dxbc::TessellatorDomain tessellator_domain =
dxbc::TessellatorDomain::kTriangle;
switch (shader_modification.vertex.host_vertex_shader_type) {
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
control_point_count = 3;
tessellator_domain = dxbc::TessellatorDomain::kTriangle;
break;
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
control_point_count = 1;
tessellator_domain = dxbc::TessellatorDomain::kTriangle;
break;
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
control_point_count = 4;
tessellator_domain = dxbc::TessellatorDomain::kQuad;
break;
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
control_point_count = 1;
tessellator_domain = dxbc::TessellatorDomain::kQuad;
break;
default:
// TODO(Triang3l): Support line patches.
assert_unhandled_case(
Expand Down Expand Up @@ -3543,30 +3533,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
// Domain location input.
ao_.OpDclInput(dxbc::Dest::VDomain(in_domain_location_used_));
}
if (in_primitive_id_used_) {
// Primitive (patch) index input.
ao_.OpDclInput(dxbc::Dest::VPrim());
}
if (in_control_point_index_used_) {
// Control point indices as float input.
uint32_t control_point_array_size = 3;
switch (shader_modification.vertex.host_vertex_shader_type) {
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
control_point_array_size = 3;
break;
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
control_point_array_size = 4;
break;
default:
// TODO(Triang3l): Support line patches.
assert_unhandled_case(
shader_modification.vertex.host_vertex_shader_type);
EmitTranslationError(
"Unsupported host vertex shader type in "
"StartVertexOrDomainShader");
}
ao_.OpDclInput(dxbc::Dest::VICP(
control_point_array_size, kInRegisterDSControlPointIndex, 0b0001));
control_point_count, kInRegisterDSControlPointIndex, 0b0001));
}
} else {
if (register_count()) {
Expand Down
2 changes: 0 additions & 2 deletions src/xenia/gpu/dxbc_shader_translator.h
Original file line number Diff line number Diff line change
Expand Up @@ -1106,8 +1106,6 @@ class DxbcShaderTranslator : public ShaderTranslator {

// Mask of domain location actually used in the domain shader.
uint32_t in_domain_location_used_;
// Whether the primitive ID has been used in the domain shader.
bool in_primitive_id_used_;
// Whether kInRegisterDSControlPointIndex has been used in the shader.
bool in_control_point_index_used_;
// Mask of the pixel/sample position actually used in the pixel shader.
Expand Down
22 changes: 2 additions & 20 deletions src/xenia/gpu/primitive_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -222,8 +222,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// Parse the primitive type and the tessellation state (VGT_OUTPUT_PATH_CNTL
// is only used in the explicit major mode) - there are cases in games when
// this register is left over after usage of tessellation in draws that don't
// need it. Also perform needed vertex count adjustments based on the
// primitive type.
// need it.
xenos::PrimitiveType guest_primitive_type = vgt_draw_initiator.prim_type;
xenos::PrimitiveType host_primitive_type = guest_primitive_type;
bool tessellation_enabled =
Expand All @@ -234,7 +233,6 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
xenos::TessellationMode tessellation_mode =
regs.Get<reg::VGT_HOS_CNTL>().tess_mode;
Shader::HostVertexShaderType host_vertex_shader_type;
uint32_t guest_draw_vertex_count = vgt_draw_initiator.num_indices;
if (tessellation_enabled) {
// Currently only supporting tessellation in known cases for safety, and not
// yet converting patch strips / fans to patch lists until games using them
Expand Down Expand Up @@ -291,29 +289,12 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// - 4D5307ED - water - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kTriangleDomainPatchIndexed;
// See the comment about the rounding for kQuadPatch.
guest_draw_vertex_count =
xe::round_up(guest_draw_vertex_count, uint32_t(3), false);
break;
case xenos::PrimitiveType::kQuadPatch:
// - 4D5307F1 - ground - continuous.
// - 4D5307F2 - garden ground - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
// While it's known that num_indices represents the control point count
// (4D5307E6, for example, for water triangle patches, performs N
// invocations of the memexporting shader calculating the edge
// tessellation factors for one patch, and then draws the water with
// num_indices = 3 * N), 4D5307F1 ground is drawn with num_indices = 1
// rather than 4. Unlike Direct3D 11 tessellation, where the patch count
// is `floor(vertex count / control points per patch)`, on the Xenos,
// the count appears to be `ceil` of that value (like a
// `for (i = 0; i < num_indices; i += 4)` loop is used to emit the
// patches). It's unlikely, however, that this adjustment should also be
// done for regular primitive types with tessellation enabled, as
// they're handled as usual primitive topologies, just post-tessellated.
guest_draw_vertex_count =
xe::align(guest_draw_vertex_count, uint32_t(4));
break;
default:
// TODO(Triang3l): Support line patches.
Expand Down Expand Up @@ -365,6 +346,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
}

// Process the indices.
uint32_t guest_draw_vertex_count = vgt_draw_initiator.num_indices;
auto vgt_dma_size = regs.Get<reg::VGT_DMA_SIZE>();
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA &&
guest_draw_vertex_count > vgt_dma_size.num_words) {
Expand Down
13 changes: 9 additions & 4 deletions src/xenia/gpu/shaders/adaptive_quad.hs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,17 @@ XeHSConstantDataOutput XePatchConstant(
[domain("quad")]
[partitioning("fractional_even")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(4)]
[outputcontrolpoints(1)]
[patchconstantfunc("XePatchConstant")]
XeHSControlPointOutput main(
InputPatch<XeHSControlPointInputAdaptive, 4> xe_input_patch) {
InputPatch<XeHSControlPointInputAdaptive, 4> xe_input_patch,
uint xe_primitive_id : SV_PrimitiveID) {
XeHSControlPointOutput output;
// Not used with control point indices.
output.index = 0.0f;
// Only the lower 24 bits of the vertex index are used (tested on an Adreno
// 200 phone). `((index & 0xFFFFFF) + offset) & 0xFFFFFF` is the same as
// `(index + offset) & 0xFFFFFF`.
output.index =
float(clamp((xe_primitive_id + xe_vertex_index_offset) & 0xFFFFFFu,
xe_vertex_index_min_max.x, xe_vertex_index_min_max.y));
return output;
}
13 changes: 9 additions & 4 deletions src/xenia/gpu/shaders/adaptive_triangle.hs.hlsl
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,17 @@ XeHSConstantDataOutput XePatchConstant(
[domain("tri")]
[partitioning("fractional_even")]
[outputtopology("triangle_cw")]
[outputcontrolpoints(3)]
[outputcontrolpoints(1)]
[patchconstantfunc("XePatchConstant")]
XeHSControlPointOutput main(
InputPatch<XeHSControlPointInputAdaptive, 3> xe_input_patch) {
InputPatch<XeHSControlPointInputAdaptive, 3> xe_input_patch,
uint xe_primitive_id : SV_PrimitiveID) {
XeHSControlPointOutput output;
// Not used with control point indices.
output.index = 0.0f;
// Only the lower 24 bits of the vertex index are used (tested on an Adreno
// 200 phone). `((index & 0xFFFFFF) + offset) & 0xFFFFFF` is the same as
// `(index + offset) & 0xFFFFFF`.
output.index =
float(clamp((xe_primitive_id + xe_vertex_index_offset) & 0xFFFFFFu,
xe_vertex_index_min_max.x, xe_vertex_index_min_max.y));
return output;
}
Loading

0 comments on commit 37579d3

Please sign in to comment.