Skip to content

Commit

Permalink
GLES: Implement vertex range culling.
Browse files Browse the repository at this point in the history
Based on tests, skips triangles with any point outside the 4096x4096 box,
except when depth clamping would engage.
  • Loading branch information
unknownbrackets committed Sep 17, 2018
1 parent 66e13f7 commit ab3a466
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 14 deletions.
5 changes: 3 additions & 2 deletions GPU/Common/ShaderCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,15 @@ enum : uint64_t {

DIRTY_BEZIERSPLINE = 1ULL << 32,
DIRTY_TEXCLAMP = 1ULL << 33,
DIRTY_CULLRANGE = 1ULL << 34,

DIRTY_DEPAL = 1ULL << 34,
DIRTY_DEPAL = 1ULL << 35,

// space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.

DIRTY_BONE_UNIFORMS = 0xFF000000ULL,

DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,

// Other dirty elements that aren't uniforms!
Expand Down
38 changes: 38 additions & 0 deletions GPU/GLES/ShaderManagerGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
else
numBones = 0;
queries.push_back({ &u_depthRange, "u_depthRange" });
queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });

#ifdef USE_BONE_ARRAY
queries.push_back({ &u_bone, "u_bone" });
Expand Down Expand Up @@ -481,6 +483,42 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid) {
float data[4] = { viewZScale, viewZCenter, viewZCenter, viewZInvScale };
SetFloatUniform4(render_, &u_depthRange, data);
}
if (dirty & DIRTY_CULLRANGE) {
// Account for the projection viewport adjustment when viewport is too large.
auto reverseViewportX = [](float x) {
float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
return (pspViewport - gstate_c.vpXOffset) * (1.0f / gstate_c.vpWidthScale);
};
auto reverseViewportY = [](float y) {
float heightScale = gstate_c.vpHeightScale;
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
// GL upside down is a pain as usual.
heightScale = -heightScale;
}
float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
return (pspViewport - gstate_c.vpYOffset) * (1.0f / heightScale);
};
auto reverseViewportZ = [](float z) {
float pspViewport = (z - gstate.getViewportZCenter()) * (1.0f / gstate.getViewportZScale());
return (pspViewport - gstate_c.vpZOffset) * (1.0f / gstate_c.vpDepthScale);
};
auto sortPair = [](float a, float b) {
return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
};

// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
// Any vertex outside this range (unless depth clamp enabled) is discarded.
auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
auto z = sortPair(reverseViewportZ(0.0f), reverseViewportZ(65535.5f));
// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".
float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;

float minValues[4]{ x.first, y.first, z.first, clampEnable };
SetFloatUniform4(render_, &u_cullRangeMin, minValues);
float maxValues[4]{ x.second, y.second, z.second, NAN };
SetFloatUniform4(render_, &u_cullRangeMax, maxValues);
}

if (dirty & DIRTY_STENCILREPLACEVALUE) {
float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
Expand Down
2 changes: 2 additions & 0 deletions GPU/GLES/ShaderManagerGLES.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class LinkedShader {
int u_texmtx;
int u_world;
int u_depthRange; // x,y = viewport xscale/xcenter. z,w=clipping minz/maxz (?)
int u_cullRangeMin;
int u_cullRangeMax;

#ifdef USE_BONE_ARRAY
int u_bone; // array, size is numBones
Expand Down
30 changes: 25 additions & 5 deletions GPU/GLES/VertexShaderGeneratorGLES.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,12 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
*uniformMask |= DIRTY_DEPTHRANGE;
}

if (!isModeThrough) {
WRITE(p, "uniform highp vec4 u_cullRangeMin;\n");
WRITE(p, "uniform highp vec4 u_cullRangeMax;\n");
*uniformMask |= DIRTY_CULLRANGE;
}

WRITE(p, "%s%s lowp vec4 v_color0;\n", shading, varying);
if (lmode) {
WRITE(p, "%s%s lowp vec3 v_color1;\n", shading, varying);
Expand Down Expand Up @@ -472,13 +478,13 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
WRITE(p, " v_fogdepth = position.w;\n");
}
if (isModeThrough) {
WRITE(p, " gl_Position = u_proj_through * vec4(position.xyz, 1.0);\n");
WRITE(p, " vec4 outPos = u_proj_through * vec4(position.xyz, 1.0);\n");
} else {
// The viewport is used in this case, so need to compensate for that.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " gl_Position = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n");
WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * vec4(position.xyz, 1.0));\n");
} else {
WRITE(p, " gl_Position = u_proj * vec4(position.xyz, 1.0);\n");
WRITE(p, " vec4 outPos = u_proj * vec4(position.xyz, 1.0);\n");
}
}
} else {
Expand Down Expand Up @@ -671,9 +677,9 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,

// Final view and projection transforms.
if (gstate_c.Supports(GPU_ROUND_DEPTH_TO_16BIT)) {
WRITE(p, " gl_Position = depthRoundZVP(u_proj * viewPos);\n");
WRITE(p, " vec4 outPos = depthRoundZVP(u_proj * viewPos);\n");
} else {
WRITE(p, " gl_Position = u_proj * viewPos;\n");
WRITE(p, " vec4 outPos = u_proj * viewPos;\n");
}

// TODO: Declare variables for dots for shade mapping if needed.
Expand Down Expand Up @@ -898,5 +904,19 @@ void GenerateVertexShader(const VShaderID &id, char *buffer, uint32_t *attrMask,
if (enableFog)
WRITE(p, " v_fogdepth = (viewPos.z + u_fogcoef.x) * u_fogcoef.y;\n");
}

if (!isModeThrough) {
WRITE(p, " vec3 projPos = outPos.xyz / outPos.w;\n");
// Vertex range culling doesn't happen when depth is clamped, so only do this if in range.
WRITE(p, " if (u_cullRangeMin.w <= 0.0f || (projPos.z >= u_cullRangeMin.z && projPos.z <= u_cullRangeMax.z)) {\n");
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y || projPos.z < u_cullRangeMin.z";
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y || projPos.z > u_cullRangeMax.z";
WRITE(p, " if (%s || %s) {\n", outMin, outMax);
WRITE(p, " outPos.w = u_cullRangeMax.w;\n");
WRITE(p, " }\n");
WRITE(p, " }\n");
}
WRITE(p, " gl_Position = outPos;\n");

WRITE(p, "}\n");
}
14 changes: 7 additions & 7 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,13 @@ const CommonCommandTableEntry commonCommandTable[] = {
// Viewport.
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE },
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE },

// Z clip
{ GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_VIEWPORTSCISSOR_STATE },
Expand Down

0 comments on commit ab3a466

Please sign in to comment.