diff --git a/cmake/compileShaders.cmake b/cmake/compileShaders.cmake index c0929d753..26d190a1c 100644 --- a/cmake/compileShaders.cmake +++ b/cmake/compileShaders.cmake @@ -16,7 +16,6 @@ set(SHADER_SOURCE_DEPENDENCIES ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/precomputed_sky.glsl ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/precomputed_sky_params.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/projection.glsl - ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/read_visbuf.glsl ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/sky.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/tiny_encryption_algorithm.h ${CMAKE_SOURCE_DIR}/src/refresh/vkpt/shader/tone_mapping_utils.glsl diff --git a/inc/common/bsp.h b/inc/common/bsp.h index 1960ddbcf..1a1ccea62 100644 --- a/inc/common/bsp.h +++ b/inc/common/bsp.h @@ -308,7 +308,7 @@ void BSP_TransformedLightPoint(lightpoint_t *point, vec3_t start, vec3_t end, #endif byte *BSP_ClusterVis(bsp_t *bsp, byte *mask, int cluster, int vis); -mleaf_t *BSP_PointLeaf(mnode_t *node, vec3_t p); +mleaf_t *BSP_PointLeaf(mnode_t *node, const vec3_t p); mmodel_t *BSP_InlineModel(bsp_t *bsp, const char *name); byte* BSP_GetPvs(bsp_t *bsp, int cluster); diff --git a/inc/common/math.h b/inc/common/math.h index 0f78c9247..44a9742fd 100644 --- a/inc/common/math.h +++ b/inc/common/math.h @@ -54,7 +54,7 @@ static inline int BoxOnPlaneSideFast(vec3_t emins, vec3_t emaxs, cplane_t *p) return BoxOnPlaneSide(emins, emaxs, p); } -static inline vec_t PlaneDiffFast(vec3_t v, cplane_t *p) +static inline vec_t PlaneDiffFast(const vec3_t v, cplane_t *p) { // fast axial cases if (p->type < 3) { diff --git a/inc/refresh/models.h b/inc/refresh/models.h index 77f75201a..68d14e1f0 100644 --- a/inc/refresh/models.h +++ b/inc/refresh/models.h @@ -86,7 +86,7 @@ typedef struct float* tangents; byte* colors; byte* blend_indices; // byte4 per vertex - float* blend_weights; // float4 per vertex + byte* blend_weights; // byte4 per vertex char* jointNames; int* jointParents; diff --git a/src/common/bsp.c b/src/common/bsp.c index 3ddcf17e2..ea1e01a03 100644 --- a/src/common/bsp.c +++ b/src/common/bsp.c @@ -1922,7 +1922,7 @@ byte *BSP_ClusterVis(bsp_t *bsp, byte *mask, int cluster, int vis) return mask; } -mleaf_t *BSP_PointLeaf(mnode_t *node, vec3_t p) +mleaf_t *BSP_PointLeaf(mnode_t *node, const vec3_t p) { float d; diff --git a/src/refresh/model_iqm.c b/src/refresh/model_iqm.c index 679c71319..436f2c9d6 100644 --- a/src/refresh/model_iqm.c +++ b/src/refresh/model_iqm.c @@ -446,7 +446,7 @@ int MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const c } } - CHECK(iqmData = (iqm_model_t*)MOD_Malloc(sizeof(iqm_model_t))); + CHECK(iqmData = MOD_Malloc(sizeof(iqm_model_t))); model->iqmData = iqmData; // fill header @@ -459,20 +459,20 @@ int MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const c if (header->num_meshes) { - CHECK(iqmData->meshes = (iqm_mesh_t*)MOD_Malloc(header->num_meshes * sizeof(iqm_mesh_t))); - CHECK(iqmData->indices = (uint32_t*)MOD_Malloc(header->num_triangles * 3 * sizeof(int))); - CHECK(iqmData->positions = (float*)MOD_Malloc(header->num_vertexes * 3 * sizeof(float))); - CHECK(iqmData->texcoords = (float*)MOD_Malloc(header->num_vertexes * 2 * sizeof(float))); - CHECK(iqmData->normals = (float*)MOD_Malloc(header->num_vertexes * 3 * sizeof(float))); + CHECK(iqmData->meshes = MOD_Malloc(header->num_meshes * sizeof(iqm_mesh_t))); + CHECK(iqmData->indices = MOD_Malloc(header->num_triangles * 3 * sizeof(int))); + CHECK(iqmData->positions = MOD_Malloc(header->num_vertexes * 3 * sizeof(float))); + CHECK(iqmData->texcoords = MOD_Malloc(header->num_vertexes * 2 * sizeof(float))); + CHECK(iqmData->normals = MOD_Malloc(header->num_vertexes * 3 * sizeof(float))); if (vertexArrayFormat[IQM_TANGENT] != -1) { - CHECK(iqmData->tangents = (float*)MOD_Malloc(header->num_vertexes * 4 * sizeof(float))); + CHECK(iqmData->tangents = MOD_Malloc(header->num_vertexes * 4 * sizeof(float))); } if (vertexArrayFormat[IQM_COLOR] != -1) { - CHECK(iqmData->colors = (byte*)MOD_Malloc(header->num_vertexes * 4 * sizeof(byte))); + CHECK(iqmData->colors = MOD_Malloc(header->num_vertexes * 4 * sizeof(byte))); } if (vertexArrayFormat[IQM_BLENDINDEXES] != -1) @@ -482,30 +482,30 @@ int MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const c if (vertexArrayFormat[IQM_BLENDWEIGHTS] != -1) { - CHECK(iqmData->blend_weights = (float*)MOD_Malloc(header->num_vertexes * 4 * sizeof(float))); + CHECK(iqmData->blend_weights = MOD_Malloc(header->num_vertexes * 4 * sizeof(byte))); } } if (header->num_joints) { - CHECK(iqmData->jointNames = (char*)MOD_Malloc(joint_names)); - CHECK(iqmData->jointParents = (int*)MOD_Malloc(header->num_joints * sizeof(int))); - CHECK(iqmData->bindJoints = (float*)MOD_Malloc(header->num_joints * 12 * sizeof(float))); // bind joint matricies - CHECK(iqmData->invBindJoints = (float*)MOD_Malloc(header->num_joints * 12 * sizeof(float))); // inverse bind joint matricies + CHECK(iqmData->jointNames = MOD_Malloc(joint_names)); + CHECK(iqmData->jointParents = MOD_Malloc(header->num_joints * sizeof(int))); + CHECK(iqmData->bindJoints = MOD_Malloc(header->num_joints * 12 * sizeof(float))); // bind joint matricies + CHECK(iqmData->invBindJoints = MOD_Malloc(header->num_joints * 12 * sizeof(float))); // inverse bind joint matricies } if (header->num_poses) { - CHECK(iqmData->poses = (iqm_transform_t*)MOD_Malloc(header->num_poses * header->num_frames * sizeof(iqm_transform_t))); // pose transforms + CHECK(iqmData->poses = MOD_Malloc(header->num_poses * header->num_frames * sizeof(iqm_transform_t))); // pose transforms } if (header->ofs_bounds) { - CHECK(iqmData->bounds = (float*)MOD_Malloc(header->num_frames * 6 * sizeof(float))); // model bounds + CHECK(iqmData->bounds = MOD_Malloc(header->num_frames * 6 * sizeof(float))); // model bounds } else if (header->num_meshes && header->num_frames == 0) { - CHECK(iqmData->bounds = (float*)MOD_Malloc(6 * sizeof(float))); // model bounds + CHECK(iqmData->bounds = MOD_Malloc(6 * sizeof(float))); // model bounds } if (header->num_meshes) @@ -575,20 +575,29 @@ int MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const c n * sizeof(float)); break; case IQM_BLENDWEIGHTS: - if (vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT) + if (vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_UBYTE) { memcpy(iqmData->blend_weights, (const byte*)header + vertexarray->offset, - n * sizeof(float)); + n * sizeof(byte)); } - else + else if(vertexArrayFormat[IQM_BLENDWEIGHTS] == IQM_FLOAT) { - // convert blend weights from byte to float - for (uint32_t vertex_idx = 0; vertex_idx < 4 * header->num_vertexes; vertex_idx++) + const float* weights = (const float*)((const byte*)header + vertexarray->offset); + + // convert blend weights from float to byte + for (uint32_t weight_idx = 0; weight_idx < 4 * header->num_vertexes; weight_idx++) { - iqmData->blend_weights[vertex_idx] = (float)((const byte*)header + vertexarray->offset)[vertex_idx] / 255.f; + float integer_weight = weights[weight_idx] * 255.f; + clamp(integer_weight, 0.f, 255.f); + iqmData->blend_weights[weight_idx] = (byte)integer_weight; } } + else + { + Com_WPrintf("R_LoadIQM: unsupported format for blend weights (%d)\n", vertexArrayFormat[IQM_BLENDWEIGHTS]); + memset(iqmData->blend_weights, 0, n * sizeof(byte)); + } break; case IQM_COLOR: memcpy(iqmData->colors, @@ -718,7 +727,7 @@ int MOD_LoadIQM_Base(model_t* model, const void* rawdata, size_t length, const c if (header->num_anims) { iqmData->num_animations = header->num_anims; - CHECK(iqmData->animations = (iqm_anim_t*)MOD_Malloc(header->num_anims * sizeof(iqm_anim_t))); + CHECK(iqmData->animations = MOD_Malloc(header->num_anims * sizeof(iqm_anim_t))); const iqmAnim_t* src = (const iqmAnim_t*)((const byte*)header + header->ofs_anims); iqm_anim_t* dst = iqmData->animations; diff --git a/src/refresh/vkpt/bsp_mesh.c b/src/refresh/vkpt/bsp_mesh.c index 5c20f04be..0e39330c2 100644 --- a/src/refresh/vkpt/bsp_mesh.c +++ b/src/refresh/vkpt/bsp_mesh.c @@ -21,6 +21,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "shader/global_textures.h" #include "material.h" #include "cameras.h" +#include "conversion.h" #include #include @@ -99,7 +100,7 @@ remove_collinear_edges(float* positions, float* tex_coords, mbasis_t* bases, int } // direct port of the encode_normal function from utils.glsl -static uint32_t +uint32_t encode_normal(const vec3_t normal) { float invL1Norm = 1.0f / (fabsf(normal[0]) + fabsf(normal[1]) + fabsf(normal[2])); @@ -131,17 +132,14 @@ static FILE* obj_dump_file = NULL; static int obj_vertex_num = 0; #endif -static int +static uint32_t create_poly( - const bsp_t *bsp, - const mface_t *surf, - uint32_t material_id, - float *positions_out, - float *tex_coord_out, - uint32_t *normals_out, - uint32_t *tangents_out, - uint32_t *material_out, - float *emissive_factors_out) + const bsp_t* bsp, + const mface_t* surf, + uint material_id, + uint32_t primitive_index, + uint32_t max_prim, + VboPrimitive* primitives_out) { static const int max_vertices = 32; float positions [3 * /*max_vertices*/ 32]; @@ -159,10 +157,7 @@ create_poly( sc[1] = 1.0f / (float)image_diffuse->height; } } - - float pos_center[3] = { 0 }; - float tc_center[2]; - + for (int i = 0; i < surf->numsurfedges; i++) { msurfedge_t *src_surfedge = surf->firstsurfedge + i; medge_t *src_edge = src_surfedge->edge; @@ -172,11 +167,7 @@ create_poly( float *t = tex_coords + i * 2; VectorCopy(src_vert->point, p); - - pos_center[0] += src_vert->point[0]; - pos_center[1] += src_vert->point[1]; - pos_center[2] += src_vert->point[2]; - + t[0] = (DotProduct(p, texinfo->axis[0]) + texinfo->offset[0]) * sc[0]; t[1] = (DotProduct(p, texinfo->axis[1]) + texinfo->offset[1]) * sc[1]; @@ -204,13 +195,7 @@ create_poly( fprintf(obj_dump_file, "\n"); } #endif - - float inv_edges = 1.f / (float)surf->numsurfedges; - VectorScale(pos_center, inv_edges, pos_center); - tc_center[0] = (DotProduct(pos_center, texinfo->axis[0]) + texinfo->offset[0]) * sc[0]; - tc_center[1] = (DotProduct(pos_center, texinfo->axis[1]) + texinfo->offset[1]) * sc[1]; - if (bsp->basisvectors) { // Check the handedness using the basis of the first vertex @@ -239,95 +224,94 @@ create_poly( remove_collinear_edges(positions, tex_coords, bases, &num_vertices); } -#define CP_V(idx, src) \ - do { \ - if(positions_out) { \ - memcpy(positions_out + (idx) * 3, src, sizeof(float) * 3); \ - } \ - } while(0) - -#define CP_T(idx, src) \ - do { \ - if(tex_coord_out) { \ - memcpy(tex_coord_out + (idx) * 2, src, sizeof(float) * 2); \ - } \ - } while(0) - - int k = 0; + if (num_vertices < 3) + return 0; - // Switch between triangle fan around center or first vertex. - // Can't use the center-based fan if normals/tangents are provided - // because it's not trivial to compute the normal and tangent at the center - // of the polygon. - //int tess_center = 0; - int tess_center = num_vertices > 4 && !is_sky && !bsp->basisvectors; + const uint32_t num_triangles = (uint32_t)num_vertices - 2; - const int num_triangles = tess_center - ? num_vertices - : num_vertices - 2; + if (!primitives_out) + return num_triangles; const float emissive_factor = (texinfo->c.flags & SURF_LIGHT) && texinfo->material->bsp_radiance ? (float)texinfo->radiance * cvar_pt_bsp_radiance_scale->value : 1.f; - bool write_normals = bsp->basisvectors && (normals_out || tangents_out); + float alpha = 1.f; + if (MAT_IsKind(material_id, MATERIAL_KIND_TRANSPARENT)) + alpha = (texinfo->c.flags & SURF_TRANS33) ? 0.33f : (texinfo->c.flags & SURF_TRANS66) ? 0.66f : 1.0f; + + const uint32_t emissive_and_alpha = floatToHalf(emissive_factor) | (floatToHalf(alpha) << 16); + + bool write_normals = bsp->basisvectors != NULL; - for (int i = 0; i < num_triangles; i++) + for (uint32_t i = 0; i < num_triangles; i++) { - int i1 = (i + 2 - tess_center) % num_vertices; - int i2 = (i + 1 - tess_center) % num_vertices; + // The prititive buffer is allocated based on the expected number of prims generated by the bsp, + // so just verify that here, mostly for debugging. + if (primitive_index + i >= max_prim) + { + assert(!"Primitive buffer overflow - there's a bug somewhere."); + return i; + } - CP_V(k, tess_center ? pos_center : positions); - CP_T(k, tess_center ? tc_center : tex_coords); + memset(primitives_out, 0, sizeof(VboPrimitive)); + + int i1 = (i + 2) % num_vertices; + int i2 = (i + 1) % num_vertices; + + float* pos = positions; + float* tc = tex_coords; + VectorCopy(pos, primitives_out->pos0); + primitives_out->uv0[0] = tc[0]; + primitives_out->uv0[1] = tc[1]; + if (write_normals) { const mbasis_t* basis = bases; const vec3_t* normal = bsp->basisvectors + basis->normal; const vec3_t* tangent = bsp->basisvectors + basis->tangent; - if (normals_out) normals_out[k] = encode_normal(*normal); - if (tangents_out) tangents_out[k] = encode_normal(*tangent); + primitives_out->normals[0] = encode_normal(*normal); + primitives_out->tangents[0] = encode_normal(*tangent); } - k++; - CP_V(k, positions + i1 * 3); - CP_T(k, tex_coords + i1 * 2); + pos = positions + i1 * 3; + tc = tex_coords + i1 * 2; + VectorCopy(pos, primitives_out->pos1); + primitives_out->uv1[0] = tc[0]; + primitives_out->uv1[1] = tc[1]; + if (write_normals) { const mbasis_t* basis = bases + i1; const vec3_t* normal = bsp->basisvectors + basis->normal; const vec3_t* tangent = bsp->basisvectors + basis->tangent; - if (normals_out) normals_out[k] = encode_normal(*normal); - if (tangents_out) tangents_out[k] = encode_normal(*tangent); + primitives_out->normals[1] = encode_normal(*normal); + primitives_out->tangents[1] = encode_normal(*tangent); } - k++; - - CP_V(k, positions + i2 * 3); - CP_T(k, tex_coords + i2 * 2); + + pos = positions + i2 * 3; + tc = tex_coords + i2 * 2; + VectorCopy(pos, primitives_out->pos2); + primitives_out->uv2[0] = tc[0]; + primitives_out->uv2[1] = tc[1]; + if (write_normals) { const mbasis_t* basis = bases + i2; const vec3_t* normal = bsp->basisvectors + basis->normal; const vec3_t* tangent = bsp->basisvectors + basis->tangent; - if (normals_out) normals_out[k] = encode_normal(*normal); - if (tangents_out) tangents_out[k] = encode_normal(*tangent); - } - k++; - - if (material_out) { - material_out[i] = material_id; + primitives_out->normals[2] = encode_normal(*normal); + primitives_out->tangents[2] = encode_normal(*tangent); } - if (emissive_factors_out) { - emissive_factors_out[i] = emissive_factor; - } + primitives_out->material_id = material_id; + primitives_out->emissive_and_alpha = emissive_and_alpha; + primitives_out->instance = 0; + + ++primitives_out; } - -#undef CP_V -#undef CP_T -#undef CP_M - - assert(k % 3 == 0); - return k; + + return num_triangles; } static int @@ -596,8 +580,26 @@ static void build_pvs2(bsp_t* bsp) } +// Provides an upper estimate (not counting the collinear edge removal, invisible materials etc.) +// for the total number of triangles needed to represent the bsp and one instance of every model. +static int count_triangles(const bsp_t* bsp) +{ + int num_tris = 0; + + for (int i = 0; i < bsp->numfaces; i++) + { + mface_t* surf = bsp->faces + i; + int num_vertices = surf->numsurfedges; + + if (num_vertices >= 3) + num_tris += (num_vertices - 2); + } + + return num_tris; +} + static void -collect_surfaces(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, int model_idx, int (*filter)(int, int)) +collect_surfaces(uint32_t *prim_ctr, bsp_mesh_t *wm, bsp_t *bsp, int model_idx, int (*filter)(int, int)) { mface_t *surfaces = model_idx < 0 ? bsp->faces : bsp->models[model_idx].firstface; int num_faces = model_idx < 0 ? bsp->numfaces : bsp->models[model_idx].numfaces; @@ -652,47 +654,45 @@ collect_surfaces(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, int model_idx, int (* int camera_id = Q_rand() % (wm->num_cameras * 4); material_id = (material_id & ~MATERIAL_LIGHT_STYLE_MASK) | ((camera_id << MATERIAL_LIGHT_STYLE_SHIFT) & MATERIAL_LIGHT_STYLE_MASK); } + + VboPrimitive* surface_prims = wm->primitives + *prim_ctr; + + uint32_t prims_in_surface = create_poly(bsp, surf, material_id, *prim_ctr, wm->num_primitives_allocated, surface_prims); - if (*idx_ctr + create_poly(bsp, surf, material_id, NULL, NULL, NULL, NULL, NULL, NULL) >= MAX_VERT_BSP) { - Com_Error(ERR_FATAL, "error: exceeding max vertex limit\n"); - } - - int cnt = create_poly(bsp, surf, material_id, - &wm->positions[*idx_ctr * 3], - &wm->tex_coords[*idx_ctr * 2], - &wm->normals[*idx_ctr], - &wm->tangents[*idx_ctr], - &wm->materials[*idx_ctr / 3], - &wm->emissive_factors[*idx_ctr / 3]); - - for (int it = *idx_ctr / 3, k = 0; k < cnt; k += 3, ++it) + for (uint32_t k = 0; k < prims_in_surface; ++k) { if (model_idx < 0) { + // Collect the positions into one array for compatibility with get_triangle_off_center(...) + float positions[9]; + VectorCopy(surface_prims[k].pos0, positions + 0); + VectorCopy(surface_prims[k].pos1, positions + 3); + VectorCopy(surface_prims[k].pos2, positions + 6); + // Compute the BSP node for this specific triangle based on its center. // The face lists in the BSP are slightly incorrect, or the original code // in q2vkpt that was extracting them was incorrect. vec3_t center, anti_center; - get_triangle_off_center(wm->positions + it * 9, center, anti_center, 0.01f); + get_triangle_off_center(positions, center, anti_center, 0.01f); int cluster = BSP_PointLeaf(bsp->nodes, center)->cluster; // If the small offset for the off-center point was too small, and that point // is not inside any cluster, try a larger offset. if (cluster < 0) { - get_triangle_off_center(wm->positions + it * 9, center, anti_center, 1.f); + get_triangle_off_center(positions, center, anti_center, 1.f); cluster = BSP_PointLeaf(bsp->nodes, center)->cluster; } - - wm->clusters[it] = cluster; + + surface_prims[k].cluster = cluster; if (cluster >= 0 && (MAT_IsKind(material_id, MATERIAL_KIND_SKY) || MAT_IsKind(material_id, MATERIAL_KIND_LAVA))) { bool is_bsp_sky_light = (surf_flags & (SURF_LIGHT | SURF_SKY)) == (SURF_LIGHT | SURF_SKY); if (is_sky_or_lava_cluster(wm, surf, cluster, material_id) || (cvar_pt_bsp_sky_lights->integer && is_bsp_sky_light)) { - wm->materials[it] |= MATERIAL_FLAG_LIGHT; + surface_prims[k].material_id |= MATERIAL_FLAG_LIGHT; } } @@ -717,10 +717,10 @@ collect_surfaces(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, int model_idx, int (* } } else - wm->clusters[it] = -1; + surface_prims[k].cluster = -1; } - *idx_ctr += cnt; + *prim_ctr += prims_in_surface; } if (any_pvs_patches) @@ -1294,13 +1294,13 @@ collect_sky_and_lava_light_polys(bsp_mesh_t *wm, bsp_t* bsp) static bool is_model_transparent(bsp_mesh_t *wm, bsp_model_t *model) { - if (model->idx_count == 0) + if (model->geometry.num_geometries == 0) return false; - for (int i = 0; i < model->idx_count / 3; i++) + for (uint prim_idx = 0; prim_idx < model->geometry.prim_counts[0]; prim_idx++) { - int prim = model->idx_offset / 3 + i; - int material = wm->materials[prim]; + uint prim = model->geometry.prim_offsets[0] + prim_idx; + uint material = wm->primitives[prim].material_id; if (!(MAT_IsKind(material, MATERIAL_KIND_SLIME) || MAT_IsKind(material, MATERIAL_KIND_WATER) || MAT_IsKind(material, MATERIAL_KIND_GLASS) || MAT_IsKind(material, MATERIAL_KIND_TRANSPARENT))) return false; @@ -1312,15 +1312,15 @@ is_model_transparent(bsp_mesh_t *wm, bsp_model_t *model) static bool is_model_masked(bsp_mesh_t *wm, bsp_model_t *model) { - if (model->idx_count == 0) + if (model->geometry.num_geometries == 0) return false; - for (int i = 0; i < model->idx_count / 3; i++) + for (uint prim_idx = 0; prim_idx < model->geometry.prim_counts[0]; prim_idx++) { - int prim = model->idx_offset / 3 + i; - int material = wm->materials[prim]; + uint prim = model->geometry.prim_offsets[0] + prim_idx; + uint material = wm->primitives[prim].material_id; - const pbr_material_t* mat = MAT_ForIndex(material & MATERIAL_INDEX_MASK); + const pbr_material_t* mat = MAT_ForIndex((int)(material & MATERIAL_INDEX_MASK)); if (mat && mat->image_mask) return true; @@ -1330,46 +1330,56 @@ is_model_masked(bsp_mesh_t *wm, bsp_model_t *model) } void -compute_aabb(const float* positions, int numvert, float* aabb_min, float* aabb_max) +append_aabb(const VboPrimitive* primitives, uint32_t numprims, float* aabb_min, float* aabb_max) { - VectorSet(aabb_min, FLT_MAX, FLT_MAX, FLT_MAX); - VectorSet(aabb_max, -FLT_MAX, -FLT_MAX, -FLT_MAX); - - for (int i = 0; i < numvert; i++) + for (uint32_t prim_idx = 0; prim_idx < numprims; prim_idx++) { - float const* position = positions + i * 3; + const VboPrimitive* prim = primitives + prim_idx; + + for (uint32_t vert_idx = 0; vert_idx < 3; vert_idx++) + { + const float* position; + switch (vert_idx) + { + case 0: position = prim->pos0; break; + case 1: position = prim->pos1; break; + default: position = prim->pos2; break; + } - aabb_min[0] = min(aabb_min[0], position[0]); - aabb_min[1] = min(aabb_min[1], position[1]); - aabb_min[2] = min(aabb_min[2], position[2]); + aabb_min[0] = min(aabb_min[0], position[0]); + aabb_min[1] = min(aabb_min[1], position[1]); + aabb_min[2] = min(aabb_min[2], position[2]); - aabb_max[0] = max(aabb_max[0], position[0]); - aabb_max[1] = max(aabb_max[1], position[1]); - aabb_max[2] = max(aabb_max[2], position[2]); + aabb_max[0] = max(aabb_max[0], position[0]); + aabb_max[1] = max(aabb_max[1], position[1]); + aabb_max[2] = max(aabb_max[2], position[2]); + } } } void -compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) +compute_aabb(const VboPrimitive* primitives, uint32_t numprims, float* aabb_min, float* aabb_max) { - // compute tangent space - uint32_t ntriangles = wm->num_indices / 3; + VectorSet(aabb_min, FLT_MAX, FLT_MAX, FLT_MAX); + VectorSet(aabb_max, -FLT_MAX, -FLT_MAX, -FLT_MAX); - wm->texel_density = Z_Malloc(MAX_VERT_BSP * sizeof(float) / 3); + append_aabb(primitives, numprims, aabb_min, aabb_max); +} - for (int idx_tri = 0; idx_tri < ntriangles; ++idx_tri) +void +compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) +{ + for (int idx_tri = 0; idx_tri < wm->num_primitives; ++idx_tri) { - uint32_t iA = wm->indices[idx_tri * 3 + 0]; // no vertex indexing - uint32_t iB = wm->indices[idx_tri * 3 + 1]; - uint32_t iC = wm->indices[idx_tri * 3 + 2]; - - float const * pA = wm->positions + (iA * 3); - float const * pB = wm->positions + (iB * 3); - float const * pC = wm->positions + (iC * 3); + VboPrimitive* prim = wm->primitives + idx_tri; + + float const * pA = prim->pos0; + float const * pB = prim->pos1; + float const * pC = prim->pos2; - float const * tA = wm->tex_coords + (iA * 2); - float const * tB = wm->tex_coords + (iB * 2); - float const * tC = wm->tex_coords + (iC * 2); + float const * tA = prim->uv0; + float const * tB = prim->uv1; + float const * tC = prim->uv2; vec3_t dP0, dP1; VectorSubtract(pB, pA, dP0); @@ -1379,6 +1389,7 @@ compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) Vector2Subtract(tB, tA, dt0); Vector2Subtract(tC, tA, dt1); + // Compute the tangent basis if it's not provided by the BSPX if (!bsp->basisvectors) { float r = 1.f / (dt0[0] * dt1[1] - dt1[0] * dt0[1]); @@ -1398,9 +1409,9 @@ compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) VectorNormalize(normal); uint32_t encoded_normal = encode_normal(normal); - wm->normals[idx_tri * 3 + 0] = encoded_normal; - wm->normals[idx_tri * 3 + 1] = encoded_normal; - wm->normals[idx_tri * 3 + 2] = encoded_normal; + prim->normals[0] = encoded_normal; + prim->normals[1] = encoded_normal; + prim->normals[2] = encoded_normal; vec3_t tangent; @@ -1410,9 +1421,9 @@ compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) VectorNormalize2(t, tangent); // Graham-Schmidt : t = normalize(t - n * (n.t)) uint32_t encoded_tangent = encode_normal(tangent); - wm->tangents[idx_tri * 3 + 0] = encoded_tangent; - wm->tangents[idx_tri * 3 + 1] = encoded_tangent; - wm->tangents[idx_tri * 3 + 2] = encoded_tangent; + prim->tangents[0] = encoded_tangent; + prim->tangents[1] = encoded_tangent; + prim->tangents[2] = encoded_tangent; vec3_t cross; CrossProduct(normal, t, cross); @@ -1420,19 +1431,20 @@ compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) if (dot < 0.0f) { - wm->materials[idx_tri] |= MATERIAL_FLAG_HANDEDNESS; + prim->material_id |= MATERIAL_FLAG_HANDEDNESS; } } + // Compute the texel density in this primitive float texel_density = 0.f; - int material_idx = wm->materials[idx_tri] & MATERIAL_INDEX_MASK; + int material_idx = (int)prim->material_id & MATERIAL_INDEX_MASK; pbr_material_t* mat = MAT_ForIndex(material_idx); if (mat && mat->image_base) { - dt0[0] *= mat->image_base->width; - dt0[1] *= mat->image_base->height; - dt1[0] *= mat->image_base->width; - dt1[1] *= mat->image_base->height; + dt0[0] *= (float)mat->image_base->width; + dt0[1] *= (float)mat->image_base->height; + dt1[0] *= (float)mat->image_base->width; + dt1[1] *= (float)mat->image_base->height; float WL0 = VectorLength(dP0); float WL1 = VectorLength(dP1); @@ -1444,7 +1456,7 @@ compute_world_tangents(bsp_t* bsp, bsp_mesh_t* wm) texel_density = max(L0, L1); } - wm->texel_density[idx_tri] = texel_density; + prim->texel_density = texel_density; } } @@ -1526,35 +1538,40 @@ load_sky_and_lava_clusters(bsp_mesh_t* wm, const char* map_name) Z_Free(filebuf); } +static void +mark_clusters_with_sky(const bsp_mesh_t* wm, const model_geometry_t* geom, uint8_t* clusters_with_sky) +{ + for (uint32_t prim_idx = 0; prim_idx < geom->prim_counts[0]; prim_idx++) + { + uint32_t prim = geom->prim_offsets[0] + prim_idx; + + int cluster = wm->primitives[prim].cluster; + if (cluster < 0) continue; + if ((cluster >> 3) < VIS_MAX_BYTES) + clusters_with_sky[cluster >> 3] |= (1 << (cluster & 7)); + } +} + static void compute_sky_visibility(bsp_mesh_t *wm, bsp_t *bsp) { memset(wm->sky_visibility, 0, VIS_MAX_BYTES); - if (wm->world_sky_count == 0 && wm->world_custom_sky_count == 0) + if (wm->geom_sky.num_geometries == 0 && wm->geom_custom_sky.num_geometries == 0) return; - int numclusters = bsp->vis->numclusters; - - char clusters_with_sky[VIS_MAX_BYTES]; + uint32_t numclusters = bsp->vis->numclusters; - memset(clusters_with_sky, 0, VIS_MAX_BYTES); - - for (int i = 0; i < (wm->world_sky_count + wm->world_custom_sky_count) / 3; i++) - { - int prim = wm->world_sky_offset / 3 + i; + uint8_t clusters_with_sky[VIS_MAX_BYTES] = { 0 }; - int cluster = wm->clusters[prim]; - if (cluster < 0) continue; - if ((cluster >> 3) < VIS_MAX_BYTES) - clusters_with_sky[cluster >> 3] |= (1 << (cluster & 7)); - } + mark_clusters_with_sky(wm, &wm->geom_sky, clusters_with_sky); + mark_clusters_with_sky(wm, &wm->geom_custom_sky, clusters_with_sky); - for (int cluster = 0; cluster < numclusters; cluster++) + for (uint32_t cluster = 0; cluster < numclusters; cluster++) { if (clusters_with_sky[cluster >> 3] & (1 << (cluster & 7))) { - byte* mask = BSP_GetPvs(bsp, cluster); + byte* mask = BSP_GetPvs(bsp, (int)cluster); for (int i = 0; i < bsp->visrowsize; i++) wm->sky_visibility[i] |= mask[i]; @@ -1572,18 +1589,26 @@ compute_cluster_aabbs(bsp_mesh_t* wm) VectorSet(wm->cluster_aabbs[c].maxs, -FLT_MAX, -FLT_MAX, -FLT_MAX); } - for (int tri = 0; tri < wm->world_idx_count / 3; tri++) + for (uint prim_idx = 0; prim_idx < wm->geom_opaque.prim_counts[0]; prim_idx++) { - int c = wm->clusters[tri]; + int c = wm->primitives[prim_idx].cluster; if(c < 0 || c >= wm->num_clusters) continue; aabb_t* aabb = wm->cluster_aabbs + c; + + const VboPrimitive* prim = wm->primitives + prim_idx; for (int i = 0; i < 3; i++) { - float const* position = wm->positions + tri * 9 + i * 3; + const float* position; + switch(i) + { + case 0: position = prim->pos0; break; + case 1: position = prim->pos1; break; + default: position = prim->pos2; break; + } aabb->mins[0] = min(aabb->mins[0], position[0]); aabb->mins[1] = min(aabb->mins[1], position[1]); @@ -1597,7 +1622,7 @@ compute_cluster_aabbs(bsp_mesh_t* wm) } static void -get_aabb_corner(aabb_t* aabb, int corner_idx, vec3_t corner) +get_aabb_corner(const aabb_t* aabb, int corner_idx, vec3_t corner) { corner[0] = (corner_idx & 1) ? aabb->maxs[0] : aabb->mins[0]; corner[1] = (corner_idx & 2) ? aabb->maxs[1] : aabb->mins[1]; @@ -1605,7 +1630,7 @@ get_aabb_corner(aabb_t* aabb, int corner_idx, vec3_t corner) } static bool -light_affects_cluster(light_poly_t* light, aabb_t* aabb) +light_affects_cluster(light_poly_t* light, const aabb_t* aabb) { // Empty cluster, nothing is visible if (aabb->mins[0] > aabb->maxs[0]) @@ -1712,8 +1737,10 @@ collect_cluster_lights(bsp_mesh_t *wm, bsp_t *bsp) #undef MAX_LIGHTS_PER_CLUSTER } -static bool -bsp_mesh_load_custom_sky(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, const char* map_name) +static tinyobj_attrib_t custom_sky_attrib; + +static uint32_t +bsp_mesh_load_custom_sky(const char* map_name) { char filename[MAX_QPATH]; Q_snprintf(filename, sizeof(filename), "maps/sky/%s.obj", map_name); @@ -1721,80 +1748,86 @@ bsp_mesh_load_custom_sky(int *idx_ctr, bsp_mesh_t *wm, bsp_t *bsp, const char* m void* file_buffer = NULL; int file_size = FS_LoadFile(filename, &file_buffer); if (!file_buffer) - return false; + return 0; - tinyobj_attrib_t attrib; tinyobj_shape_t* shapes = NULL; size_t num_shapes; tinyobj_material_t* materials = NULL; size_t num_materials; unsigned int flags = TINYOBJ_FLAG_TRIANGULATE; - int ret = tinyobj_parse_obj(&attrib, &shapes, &num_shapes, &materials, + int ret = tinyobj_parse_obj(&custom_sky_attrib, &shapes, &num_shapes, &materials, &num_materials, (const char*)file_buffer, file_size, flags); FS_FreeFile(file_buffer); if (ret != TINYOBJ_SUCCESS) { Com_WPrintf("Couldn't parse sky polygon definition file %s.\n", filename); - return false; + return 0; } + tinyobj_shapes_free(shapes, num_shapes); + tinyobj_materials_free(materials, num_materials); + + if (custom_sky_attrib.num_face_num_verts == 0) + tinyobj_attrib_free(&custom_sky_attrib); + + return custom_sky_attrib.num_face_num_verts; +} + +static uint32_t +bsp_mesh_create_custom_sky_prims(uint32_t* prim_ctr, bsp_mesh_t* wm, const bsp_t* bsp) +{ int face_offset = 0; - for (int nprim = 0; nprim < attrib.num_face_num_verts; nprim++) + for (uint32_t nprim = 0; nprim < custom_sky_attrib.num_face_num_verts; nprim++) { - int face_num_verts = attrib.face_num_verts[nprim]; - int i0 = attrib.faces[face_offset + 0].v_idx; - int i1 = attrib.faces[face_offset + 1].v_idx; - int i2 = attrib.faces[face_offset + 2].v_idx; - - vec3_t v0, v1, v2; - VectorCopy(attrib.vertices + i0 * 3, v0); - VectorCopy(attrib.vertices + i1 * 3, v1); - VectorCopy(attrib.vertices + i2 * 3, v2); - - int wm_index = *idx_ctr; - int wm_prim = wm_index / 3; - - VectorCopy(v0, wm->positions + wm_index * 3 + 0); - VectorCopy(v1, wm->positions + wm_index * 3 + 3); - VectorCopy(v2, wm->positions + wm_index * 3 + 6); - - wm->tex_coords[wm_index * 2 + 0] = 0.f; - wm->tex_coords[wm_index * 2 + 1] = 0.f; - wm->tex_coords[wm_index * 2 + 2] = 0.f; - wm->tex_coords[wm_index * 2 + 3] = 0.f; - wm->tex_coords[wm_index * 2 + 4] = 0.f; - wm->tex_coords[wm_index * 2 + 5] = 0.f; + int face_num_verts = custom_sky_attrib.face_num_verts[nprim]; + int i0 = custom_sky_attrib.faces[face_offset + 0].v_idx; + int i1 = custom_sky_attrib.faces[face_offset + 1].v_idx; + int i2 = custom_sky_attrib.faces[face_offset + 2].v_idx; + float positions[9]; + VectorCopy(custom_sky_attrib.vertices + i0 * 3, positions + 0); + VectorCopy(custom_sky_attrib.vertices + i1 * 3, positions + 3); + VectorCopy(custom_sky_attrib.vertices + i2 * 3, positions + 6); + + if (*prim_ctr >= wm->num_primitives_allocated) + { + assert(!"Primitive buffer overflow."); + return nprim; + } + + VboPrimitive* prim = wm->primitives + *prim_ctr; + + memset(prim, 0, sizeof(*prim)); + VectorCopy(positions + 0, prim->pos0); + VectorCopy(positions + 3, prim->pos1); + VectorCopy(positions + 6, prim->pos2); + vec3_t center; - get_triangle_off_center(wm->positions + wm_index * 3, center, NULL, 1.f); + get_triangle_off_center(positions, center, NULL, 1.f); int cluster = BSP_PointLeaf(bsp->nodes, center)->cluster; - wm->clusters[wm_prim] = cluster; - wm->materials[wm_prim] = MATERIAL_FLAG_LIGHT | MATERIAL_KIND_SKY; + prim->cluster = cluster; + prim->material_id = MATERIAL_FLAG_LIGHT | MATERIAL_KIND_SKY; light_poly_t* light = append_light_poly(&wm->num_light_polys, &wm->allocated_light_polys, &wm->light_polys); - VectorCopy(v0, light->positions + 0); - VectorCopy(v1, light->positions + 3); - VectorCopy(v2, light->positions + 6); + memcpy(light->positions, positions, sizeof(prim_positions_t)); VectorSet(light->color, -1.f, -1.f, -1.f); // special value for the sky VectorCopy(center, light->off_center); light->material = 0; light->style = 0; light->cluster = cluster; - *idx_ctr += 3; + ++*prim_ctr; face_offset += face_num_verts; } - tinyobj_attrib_free(&attrib); - tinyobj_shapes_free(shapes, num_shapes); - tinyobj_materials_free(materials, num_materials); + tinyobj_attrib_free(&custom_sky_attrib); - return true; + return custom_sky_attrib.num_face_num_verts; } void @@ -1821,23 +1854,22 @@ bsp_mesh_create_from_bsp(bsp_mesh_t *wm, bsp_t *bsp, const char* map_name) { Com_Error(ERR_FATAL, "The BSP model has too many clusters (%d)", wm->num_clusters); } + + wm->num_primitives_allocated = count_triangles(bsp); - wm->num_vertices = 0; - wm->num_indices = 0; - wm->positions = Z_Malloc(MAX_VERT_BSP * 3 * sizeof(*wm->positions)); - wm->tex_coords = Z_Malloc(MAX_VERT_BSP * 2 * sizeof(*wm->tex_coords)); - wm->normals = Z_Malloc(MAX_VERT_BSP * sizeof(uint32_t)); - wm->tangents = Z_Malloc(MAX_VERT_BSP * sizeof(uint32_t)); - wm->materials = Z_Malloc(MAX_VERT_BSP / 3 * sizeof(*wm->materials)); - wm->clusters = Z_Malloc(MAX_VERT_BSP / 3 * sizeof(*wm->clusters)); - wm->emissive_factors = Z_Malloc(MAX_VERT_BSP / 3 * sizeof(*wm->emissive_factors)); + uint32_t num_custom_sky_prims = bsp_mesh_load_custom_sky(full_game_map_name); + if (num_custom_sky_prims > 0) + wm->num_primitives_allocated += num_custom_sky_prims; + + wm->primitives = Z_Malloc(wm->num_primitives_allocated * sizeof(VboPrimitive)); + wm->num_primitives = 0; // clear these here because `bsp_mesh_load_custom_sky` creates lights before `collect_light_polys` wm->num_light_polys = 0; wm->allocated_light_polys = 0; wm->light_polys = NULL; - int idx_ctr = 0; + uint32_t prim_ctr = 0; #if DUMP_WORLD_MESH_TO_OBJ { @@ -1848,32 +1880,41 @@ bsp_mesh_create_from_bsp(bsp_mesh_t *wm, bsp_t *bsp, const char* map_name) } #endif - collect_surfaces(&idx_ctr, wm, bsp, -1, filter_static_opaque); - wm->world_idx_count = idx_ctr; + vkpt_init_model_geometry(&wm->geom_opaque, 1); + vkpt_init_model_geometry(&wm->geom_transparent, 1); + vkpt_init_model_geometry(&wm->geom_masked, 1); + vkpt_init_model_geometry(&wm->geom_sky, 1); + vkpt_init_model_geometry(&wm->geom_custom_sky, 1); - wm->world_transparent_offset = idx_ctr; - collect_surfaces(&idx_ctr, wm, bsp, -1, filter_static_transparent); - wm->world_transparent_count = idx_ctr - wm->world_transparent_offset; + uint32_t first_prim = prim_ctr; + collect_surfaces(&prim_ctr, wm, bsp, -1, filter_static_opaque); + vkpt_append_model_geometry(&wm->geom_opaque, prim_ctr - first_prim, first_prim, "bsp"); - wm->world_masked_offset = idx_ctr; - collect_surfaces(&idx_ctr, wm, bsp, -1, filter_static_masked); - wm->world_masked_count = idx_ctr - wm->world_masked_offset; + first_prim = prim_ctr; + collect_surfaces(&prim_ctr, wm, bsp, -1, filter_static_transparent); + vkpt_append_model_geometry(&wm->geom_transparent, prim_ctr - first_prim, first_prim, "bsp"); - wm->world_sky_offset = idx_ctr; - collect_surfaces(&idx_ctr, wm, bsp, -1, filter_static_sky); - wm->world_sky_count = idx_ctr - wm->world_sky_offset; + first_prim = prim_ctr; + collect_surfaces(&prim_ctr, wm, bsp, -1, filter_static_masked); + vkpt_append_model_geometry(&wm->geom_masked, prim_ctr - first_prim, first_prim, "bsp"); - wm->world_custom_sky_offset = idx_ctr; - bsp_mesh_load_custom_sky(&idx_ctr, wm, bsp, full_game_map_name); + first_prim = prim_ctr; + collect_surfaces(&prim_ctr, wm, bsp, -1, filter_static_sky); + vkpt_append_model_geometry(&wm->geom_sky, prim_ctr - first_prim, first_prim, "bsp"); + + first_prim = prim_ctr; + if (num_custom_sky_prims > 0) + bsp_mesh_create_custom_sky_prims(&prim_ctr, wm, bsp); if (cvar_pt_bsp_sky_lights->integer > 1) - collect_surfaces(&idx_ctr, wm, bsp, -1, filter_nodraw_sky_lights); - wm->world_custom_sky_count = idx_ctr - wm->world_custom_sky_offset; + collect_surfaces(&prim_ctr, wm, bsp, -1, filter_nodraw_sky_lights); + vkpt_append_model_geometry(&wm->geom_custom_sky, prim_ctr - first_prim, first_prim, "bsp"); for (int k = 0; k < bsp->nummodels; k++) { bsp_model_t* model = wm->models + k; - model->idx_offset = idx_ctr; - collect_surfaces(&idx_ctr, wm, bsp, k, filter_all); - model->idx_count = idx_ctr - model->idx_offset; + first_prim = prim_ctr; + collect_surfaces(&prim_ctr, wm, bsp, k, filter_all); + vkpt_init_model_geometry(&model->geometry, 1); + vkpt_append_model_geometry(&model->geometry, prim_ctr - first_prim, first_prim, "bsp_model"); } #if DUMP_WORLD_MESH_TO_OBJ @@ -1891,30 +1932,23 @@ bsp_mesh_create_from_bsp(bsp_mesh_t *wm, bsp_t *bsp, const char* map_name) } } - wm->num_indices = idx_ctr; - wm->num_vertices = idx_ctr; - - wm->indices = Z_Malloc(idx_ctr * sizeof(int)); - for (int i = 0; i < wm->num_vertices; i++) - wm->indices[i] = i; - + wm->num_primitives = prim_ctr; + compute_world_tangents(bsp, wm); - if (wm->num_vertices >= MAX_VERT_BSP) { - Com_Error(ERR_FATAL, "The BSP model has too many vertices (%d)", wm->num_vertices); - } - for(int i = 0; i < wm->num_models; i++) { bsp_model_t* model = wm->models + i; - compute_aabb(wm->positions + model->idx_offset * 3, model->idx_count, model->aabb_min, model->aabb_max); + compute_aabb(wm->primitives + model->geometry.prim_offsets[0], model->geometry.prim_counts[0], model->aabb_min, model->aabb_max); VectorAdd(model->aabb_min, model->aabb_max, model->center); VectorScale(model->center, 0.5f, model->center); } - compute_aabb(wm->positions, wm->world_idx_count, wm->world_aabb.mins, wm->world_aabb.maxs); + compute_aabb(wm->primitives + wm->geom_opaque.prim_offsets[0], wm->geom_opaque.prim_counts[0], wm->world_aabb.mins, wm->world_aabb.maxs); + append_aabb(wm->primitives + wm->geom_transparent.prim_offsets[0], wm->geom_transparent.prim_counts[0], wm->world_aabb.mins, wm->world_aabb.maxs); + append_aabb(wm->primitives + wm->geom_masked.prim_offsets[0], wm->geom_masked.prim_counts[0], wm->world_aabb.mins, wm->world_aabb.maxs); vec3_t margin = { 1.f, 1.f, 1.f }; VectorSubtract(wm->world_aabb.mins, margin, wm->world_aabb.mins); @@ -1949,15 +1983,7 @@ bsp_mesh_destroy(bsp_mesh_t *wm) { Z_Free(wm->models); - Z_Free(wm->positions); - Z_Free(wm->tex_coords); - Z_Free(wm->normals); - Z_Free(wm->tangents); - Z_Free(wm->indices); - Z_Free(wm->clusters); - Z_Free(wm->materials); - Z_Free(wm->texel_density); - Z_Free(wm->emissive_factors); + Z_Free(wm->primitives); Z_Free(wm->light_polys); Z_Free(wm->cluster_lights); diff --git a/src/refresh/vkpt/fog.c b/src/refresh/vkpt/fog.c index 6f353857c..2b0358224 100644 --- a/src/refresh/vkpt/fog.c +++ b/src/refresh/vkpt/fog.c @@ -217,9 +217,9 @@ void vkpt_fog_reset(void) memset(fog_volumes, 0, sizeof(fog_volumes)); } -void vkpt_fog_upload(struct ShaderFogVolume* dst) +void vkpt_fog_upload(ShaderFogVolume* dst) { - memset(dst, 0, sizeof(ShaderFogVolume_t) * MAX_FOG_VOLUMES); + memset(dst, 0, sizeof(ShaderFogVolume) * MAX_FOG_VOLUMES); for (int i = 0; i < MAX_FOG_VOLUMES; i++) { diff --git a/src/refresh/vkpt/fog.h b/src/refresh/vkpt/fog.h index 893b9624e..576481311 100644 --- a/src/refresh/vkpt/fog.h +++ b/src/refresh/vkpt/fog.h @@ -20,6 +20,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #define __FOG_H_ #include +#include "shader/global_ubo.h" typedef struct { @@ -35,6 +36,6 @@ struct ShaderFogVolume; void vkpt_fog_init(void); void vkpt_fog_shutdown(void); void vkpt_fog_reset(void); -void vkpt_fog_upload(struct ShaderFogVolume* dst); +void vkpt_fog_upload(ShaderFogVolume* dst); #endif // __FOG_H_ diff --git a/src/refresh/vkpt/main.c b/src/refresh/vkpt/main.c index 298476f86..32d8d630e 100644 --- a/src/refresh/vkpt/main.c +++ b/src/refresh/vkpt/main.c @@ -778,8 +778,6 @@ create_command_pool_and_fences() _VK(vkCreateFence(qvk.device, &fence_info, NULL, qvk.fences_frame_sync + i)); ATTACH_LABEL_VARIABLE(qvk.fences_frame_sync[i], FENCE); } - _VK(vkCreateFence(qvk.device, &fence_info, NULL, &qvk.fence_vertex_sync)); - ATTACH_LABEL_VARIABLE(qvk.fence_vertex_sync, FENCE); return VK_SUCCESS; } @@ -1505,7 +1503,6 @@ destroy_vulkan() for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { vkDestroyFence(qvk.device, qvk.fences_frame_sync[i], NULL); } - vkDestroyFence(qvk.device, qvk.fence_vertex_sync, NULL); vkpt_free_command_buffers(&qvk.cmd_buffers_graphics); vkpt_free_command_buffers(&qvk.cmd_buffers_transfer); @@ -1539,15 +1536,15 @@ destroy_vulkan() typedef struct entity_hash_s { unsigned int mesh : 8; unsigned int model : 9; - unsigned int entity : 15; + unsigned int entity : 14; + unsigned int bsp : 1; } entity_hash_t; static int entity_frame_num = 0; -static int model_entity_ids[2][MAX_ENTITIES]; -static int world_entity_ids[2][MAX_ENTITIES]; +static uint32_t model_entity_ids[2][MAX_MODEL_INSTANCES]; static int model_entity_id_count[2]; -static int world_entity_id_count[2]; static int iqm_matrix_count[2]; +static ModelInstance model_instances_prev[MAX_MODEL_INSTANCES]; #define MAX_MODEL_LIGHTS 16384 static int num_model_lights = 0; @@ -1567,10 +1564,10 @@ static pbr_material_t const * get_mesh_material(const entity_t* entity, const ma return mesh->materials[skinnum]; } -static inline uint32_t fill_model_instance(const entity_t* entity, const model_t* model, const maliasmesh_t* mesh, - const float* transform, int model_instance_index, bool is_viewer_weapon, bool is_double_sided, int iqm_matrix_index) +static uint32_t compute_mesh_material_flags(const entity_t* entity, const model_t* model, + const maliasmesh_t* mesh, bool is_viewer_weapon, bool is_double_sided) { - pbr_material_t const * material = get_mesh_material(entity, mesh); + pbr_material_t const* material = get_mesh_material(entity, mesh); if (!material) { @@ -1578,12 +1575,12 @@ static inline uint32_t fill_model_instance(const entity_t* entity, const model_t return 0; } - int material_id = material->flags; + uint32_t material_id = material->flags; - if(MAT_IsKind(material_id, MATERIAL_KIND_INVISIBLE)) + if (MAT_IsKind(material_id, MATERIAL_KIND_INVISIBLE)) return 0; // skip the mesh - if(MAT_IsKind(material_id, MATERIAL_KIND_CHROME)) + if (MAT_IsKind(material_id, MATERIAL_KIND_CHROME)) material_id = MAT_SetKind(material_id, MATERIAL_KIND_CHROME_MODEL); if (model->model_class == MCLASS_EXPLOSION) @@ -1598,7 +1595,7 @@ static inline uint32_t fill_model_instance(const entity_t* entity, const model_t if (is_double_sided) material_id |= MATERIAL_FLAG_DOUBLE_SIDED; - if (!MAT_IsKind(material_id, MATERIAL_KIND_GLASS)) + if (!MAT_IsKind(material_id, MATERIAL_KIND_GLASS)) { if (entity->flags & RF_SHELL_RED) material_id |= MATERIAL_FLAG_SHELL_RED; @@ -1608,26 +1605,42 @@ static inline uint32_t fill_model_instance(const entity_t* entity, const model_t material_id |= MATERIAL_FLAG_SHELL_BLUE; } - ModelInstance* instance = &vkpt_refdef.uniform_instance_buffer.model_instances[model_instance_index]; + if (mesh->handedness) + material_id |= MATERIAL_FLAG_HANDEDNESS; + return material_id; +} + +static void fill_model_instance(ModelInstance* instance, const entity_t* entity, const model_t* model, const maliasmesh_t* mesh, + const float* transform, uint32_t material_id, int instance_index, int iqm_matrix_index) +{ + int cluster = -1; + if (bsp_world_model) + cluster = BSP_PointLeaf(bsp_world_model->nodes, entity->origin)->cluster; + int frame = entity->frame; int oldframe = entity->oldframe; if (frame >= model->numframes) frame = 0; if (oldframe >= model->numframes) oldframe = 0; - memcpy(instance->M, transform, sizeof(float) * 16); - instance->idx_offset = mesh->idx_offset; - instance->model_index = model - r_models; - instance->offset_curr = mesh->vertex_offset + frame * mesh->numverts * (sizeof(model_vertex_t) / sizeof(uint32_t)); - instance->offset_prev = mesh->vertex_offset + oldframe * mesh->numverts * (sizeof(model_vertex_t) / sizeof(uint32_t)); - instance->backlerp = entity->backlerp; + memcpy(instance->transform, transform, sizeof(float) * 16); + memcpy(instance->transform_prev, transform, sizeof(float) * 16); instance->material = material_id; + instance->cluster = cluster; + instance->source_buffer_idx = (int)(model - r_models) + VERTEX_BUFFER_FIRST_MODEL; + instance->prim_count = mesh->numtris; + instance->prim_offset_curr_pose_curr_frame = mesh->tri_offset + frame * mesh->numtris; + instance->prim_offset_prev_pose_curr_frame = mesh->tri_offset + oldframe * mesh->numtris; + instance->prim_offset_curr_pose_prev_frame = instance->prim_offset_curr_pose_curr_frame; + instance->prim_offset_prev_pose_prev_frame = instance->prim_offset_prev_pose_curr_frame; + instance->pose_lerp_curr_frame = entity->backlerp; + instance->pose_lerp_prev_frame = instance->pose_lerp_curr_frame; + instance->iqm_matrix_offset_curr_frame = iqm_matrix_index; + instance->iqm_matrix_offset_prev_frame = instance->iqm_matrix_offset_curr_frame; + instance->frame = 0; instance->alpha = (entity->flags & RF_TRANSLUCENT) ? entity->alpha : 1.0f; - instance->is_iqm = (model->iqmData) ? 1 : 0; - if (instance->is_iqm) - instance->offset_prev = iqm_matrix_index; - - return material_id; + instance->render_buffer_idx = 0; // to be filled later + instance->render_prim_offset = 0; } static void @@ -1695,36 +1708,26 @@ static void instance_model_lights(int num_light_polys, const light_poly_t* light num_model_lights++; } } +static const mat4 g_identity_transform = { + { 1.f, 0.f, 0.f, 0.f }, + { 0.f, 1.f, 0.f, 0.f }, + { 0.f, 0.f, 1.f, 0.f }, + { 0.f, 0.f, 0.f, 1.f } +}; -static void process_bsp_entity(const entity_t* entity, int* bsp_mesh_idx, int* instance_idx, int* num_instanced_vert) +static void process_bsp_entity(const entity_t* entity, int* instance_count) { - QVKInstanceBuffer_t* uniform_instance_buffer = &vkpt_refdef.uniform_instance_buffer; - uint32_t* ubo_bsp_cluster_id = (uint32_t*)uniform_instance_buffer->bsp_cluster_id; - uint32_t* ubo_bsp_prim_offset = (uint32_t*)uniform_instance_buffer->bsp_prim_offset; - uint32_t* ubo_instance_buf_offset = (uint32_t*)uniform_instance_buffer->bsp_instance_buf_offset; - uint32_t* ubo_instance_buf_size = (uint32_t*)uniform_instance_buffer->bsp_instance_buf_size; - - const int current_bsp_mesh_index = *bsp_mesh_idx; - if (current_bsp_mesh_index >= SHADER_MAX_BSP_ENTITIES) - { - assert(!"BSP entity count overflow"); - return; - } + InstanceBuffer* uniform_instance_buffer = &vkpt_refdef.uniform_instance_buffer; - if (*instance_idx >= (SHADER_MAX_ENTITIES + SHADER_MAX_BSP_ENTITIES)) + const int current_instance_idx = *instance_count; + if (current_instance_idx >= MAX_MODEL_INSTANCES) { - assert(!"Total entity count overflow"); + assert(!"Entity count overflow"); return; } - - world_entity_ids[entity_frame_num][current_bsp_mesh_index] = entity->id; - + float transform[16]; create_entity_matrix(transform, (entity_t*)entity, false); - BspMeshInstance* ubo_instance_info = uniform_instance_buffer->bsp_mesh_instances + current_bsp_mesh_index; - memcpy(&ubo_instance_info->M, transform, sizeof(transform)); - ubo_instance_info->frame = entity->frame; - memset(ubo_instance_info->padding, 0, sizeof(ubo_instance_info->padding)); bsp_model_t* model = vkpt_refdef.bsp_mesh_world.models + (~entity->model); @@ -1751,42 +1754,53 @@ static void process_bsp_entity(const entity_t* entity, int* bsp_mesh_idx, int* i cluster = BSP_PointLeaf(bsp_world_model->nodes, corner_pt_world)->cluster; - if(cluster >= 0) + if (cluster >= 0) break; } } - ubo_bsp_cluster_id[current_bsp_mesh_index] = cluster; - - ubo_bsp_prim_offset[current_bsp_mesh_index] = model->idx_offset / 3; - - const int mesh_vertex_num = model->idx_count; - ubo_instance_buf_offset[current_bsp_mesh_index] = *num_instanced_vert / 3; - ubo_instance_buf_size[current_bsp_mesh_index] = mesh_vertex_num / 3; - - ((int*)uniform_instance_buffer->model_indices)[*instance_idx] = ~current_bsp_mesh_index; - - *num_instanced_vert += mesh_vertex_num; + entity_hash_t hash; + hash.entity = entity->id; + hash.model = ~entity->model; + hash.mesh = 0; + hash.bsp = 1; + + model_entity_ids[entity_frame_num][current_instance_idx] = *(uint32_t*)&hash; + + ModelInstance* mi = uniform_instance_buffer->model_instances + current_instance_idx; + memcpy(&mi->transform, transform, sizeof(transform)); + memcpy(&mi->transform_prev, transform, sizeof(transform)); + mi->material = 0; + mi->cluster = cluster; + mi->source_buffer_idx = VERTEX_BUFFER_WORLD; + mi->prim_count = model->geometry.prim_counts[0]; + mi->prim_offset_curr_pose_curr_frame = 0; // bsp models are not processed by the instancing shader + mi->prim_offset_prev_pose_curr_frame = 0; + mi->prim_offset_curr_pose_prev_frame = 0; + mi->prim_offset_prev_pose_prev_frame = 0; + mi->pose_lerp_curr_frame = 0.f; + mi->pose_lerp_prev_frame = 0.f; + mi->iqm_matrix_offset_curr_frame = -1; + mi->iqm_matrix_offset_prev_frame = -1; + mi->frame = entity->frame; + mi->alpha = 1.f; + mi->render_buffer_idx = VERTEX_BUFFER_WORLD; + mi->render_prim_offset = model->geometry.prim_offsets[0]; instance_model_lights(model->num_light_polys, model->light_polys, transform); - (*bsp_mesh_idx)++; - (*instance_idx)++; -} + if (model->geometry.accel) + { + vkpt_pt_instance_model_blas(&model->geometry, mi->transform, VERTEX_BUFFER_WORLD, current_instance_idx); + } -static inline bool is_transparent_material(uint32_t material) -{ - return MAT_IsKind(material, MATERIAL_KIND_SLIME) - || MAT_IsKind(material, MATERIAL_KIND_WATER) - || MAT_IsKind(material, MATERIAL_KIND_GLASS) - || MAT_IsKind(material, MATERIAL_KIND_TRANSPARENT); -} + if (!model->transparent) + { + vkpt_shadow_map_add_instance(transform, qvk.buf_world.buffer, vkpt_refdef.bsp_mesh_world.vertex_data_offset + + mi->render_prim_offset * sizeof(prim_positions_t), mi->prim_count); + } -static inline bool is_masked_material(uint32_t material) -{ - const pbr_material_t* mat = MAT_ForIndex(material & MATERIAL_INDEX_MASK); - - return mat && mat->image_mask; + (*instance_count)++; } #define MESH_FILTER_TRANSPARENT 1 @@ -1799,27 +1813,23 @@ static void process_regular_entity( const model_t* model, bool is_viewer_weapon, bool is_double_sided, - int* model_instance_idx, - int* instance_idx, - int* num_instanced_vert, + int* instance_count, + int* animated_count, + int* num_instanced_prim, int mesh_filter, bool* contains_transparent, bool* contains_masked, int* iqm_matrix_offset, float* iqm_matrix_data) { - QVKInstanceBuffer_t* uniform_instance_buffer = &vkpt_refdef.uniform_instance_buffer; - uint32_t* ubo_instance_buf_offset = (uint32_t*)uniform_instance_buffer->model_instance_buf_offset; - uint32_t* ubo_instance_buf_size = (uint32_t*)uniform_instance_buffer->model_instance_buf_size; - uint32_t* ubo_model_idx_offset = (uint32_t*)uniform_instance_buffer->model_idx_offset; - uint32_t* ubo_model_cluster_id = (uint32_t*)uniform_instance_buffer->model_cluster_id; + InstanceBuffer* uniform_instance_buffer = &vkpt_refdef.uniform_instance_buffer; float transform[16]; create_entity_matrix(transform, (entity_t*)entity, is_viewer_weapon); - int current_model_instance_index = *model_instance_idx; - int current_instance_index = *instance_idx; - int current_num_instanced_vert = *num_instanced_vert; + int current_instance_index = *instance_count; + int current_animated_index = *animated_count; + int current_num_instanced_prim = *num_instanced_prim; if (contains_transparent) *contains_transparent = false; @@ -1840,35 +1850,61 @@ static void process_regular_entity( *iqm_matrix_offset += (int)model->iqmData->num_poses; } + bool use_static_blas = vkpt_model_is_static(model) && (mesh_filter != MESH_FILTER_ALL); + + const model_vbo_t* vbo = vkpt_get_model_vbo(model); + + if (use_static_blas) + { + const model_geometry_t* geom = NULL; + + if (mesh_filter & MESH_FILTER_MASKED) + geom = &vbo->geom_masked; + else if (mesh_filter & MESH_FILTER_TRANSPARENT) + geom = &vbo->geom_transparent; + else + geom = &vbo->geom_opaque; + + if (geom->accel) + { + // ugly typecast + mat4 transform_; + memcpy(transform_, transform, sizeof(mat4)); + + uint32_t model_index = (uint32_t)(model - r_models); + + vkpt_pt_instance_model_blas(geom, transform_, VERTEX_BUFFER_FIRST_MODEL + model_index, current_instance_index); + } + } + for (int i = 0; i < model->nummeshes; i++) { const maliasmesh_t* mesh = model->meshes + i; - if (current_model_instance_index >= SHADER_MAX_ENTITIES) + if (current_instance_index >= MAX_MODEL_INSTANCES) { - assert(!"Model entity count overflow"); + assert(!"Model instance count overflow"); break; } - if (current_instance_index >= (SHADER_MAX_ENTITIES + SHADER_MAX_BSP_ENTITIES)) + if (!use_static_blas && current_animated_index >= MAX_MODEL_INSTANCES) { - assert(!"Total entity count overflow"); + assert(!"Animated model count overflow"); break; } - if (mesh->idx_offset < 0 || mesh->vertex_offset < 0) + if (mesh->tri_offset < 0) { // failed to upload the vertex data - don't instance this mesh continue; } - uint32_t material_id = fill_model_instance(entity, model, mesh, transform, - current_model_instance_index,is_viewer_weapon, is_double_sided, iqm_matrix_index); - + uint32_t material_id = compute_mesh_material_flags(entity, model, mesh, is_viewer_weapon, is_double_sided); + if (!material_id) continue; - if (is_masked_material(material_id)) + if (MAT_IsMasked(material_id)) { if (contains_masked) *contains_masked = true; @@ -1876,7 +1912,7 @@ static void process_regular_entity( if (!(mesh_filter & MESH_FILTER_MASKED)) continue; } - else if (is_transparent_material(material_id)) + else if (MAT_IsTransparent(material_id)) { if(contains_transparent) *contains_transparent = true; @@ -1894,24 +1930,38 @@ static void process_regular_entity( hash.entity = entity->id; hash.model = entity->model; hash.mesh = i; + hash.bsp = 0; - model_entity_ids[entity_frame_num][current_model_instance_index] = *(uint32_t*)&hash; + model_entity_ids[entity_frame_num][current_instance_index] = *(uint32_t*)&hash; + + ModelInstance* mi = uniform_instance_buffer->model_instances + current_instance_index; - uint32_t cluster_id = ~0u; - if(bsp_world_model) - cluster_id = BSP_PointLeaf(bsp_world_model->nodes, ((entity_t*)entity)->origin)->cluster; - ubo_model_cluster_id[current_model_instance_index] = cluster_id; + fill_model_instance(mi, entity, model, mesh, transform, material_id, + current_instance_index, iqm_matrix_index); - ubo_model_idx_offset[current_model_instance_index] = mesh->idx_offset; + if (use_static_blas) + { + mi->render_buffer_idx = mi->source_buffer_idx; + mi->render_prim_offset = mi->prim_offset_curr_pose_curr_frame; - ubo_instance_buf_offset[current_model_instance_index] = current_num_instanced_vert / 3; - ubo_instance_buf_size[current_model_instance_index] = mesh->numtris; + if (!MAT_IsTransparent(material_id)) + { + vkpt_shadow_map_add_instance(transform, vbo->buffer.buffer, vbo->vertex_data_offset + + mi->render_prim_offset * sizeof(prim_positions_t), mi->prim_count); + } + } + else + { + uniform_instance_buffer->animated_model_indices[current_animated_index] = current_instance_index; - ((int*)uniform_instance_buffer->model_indices)[current_instance_index] = current_model_instance_index; + mi->render_buffer_idx = VERTEX_BUFFER_INSTANCED; + mi->render_prim_offset = current_num_instanced_prim; + + current_animated_index++; + current_num_instanced_prim += mesh->numtris; + } - current_model_instance_index++; current_instance_index++; - current_num_instanced_vert += mesh->numtris * 3; } // add cylinder lights for wall lamps @@ -1928,9 +1978,9 @@ static void process_regular_entity( vkpt_build_cylinder_light(model_lights, &num_model_lights, MAX_MODEL_LIGHTS, bsp_world_model, begin, end, color, 1.5f); } - *model_instance_idx = current_model_instance_index; - *instance_idx = current_instance_index; - *num_instanced_vert = current_num_instanced_vert; + *instance_count = current_instance_index; + *animated_count = current_animated_index; + *num_instanced_prim = current_num_instanced_prim; } #if CL_RTX_SHADERBALLS @@ -1949,16 +1999,8 @@ prepare_entities(EntityUploadInfo* upload_info) { entity_frame_num = !entity_frame_num; - QVKInstanceBuffer_t* instance_buffer = &vkpt_refdef.uniform_instance_buffer; - - memcpy(instance_buffer->bsp_mesh_instances_prev, instance_buffer->bsp_mesh_instances, - sizeof(instance_buffer->bsp_mesh_instances_prev)); - memcpy(instance_buffer->model_instances_prev, instance_buffer->model_instances, - sizeof(instance_buffer->model_instances_prev)); - - memcpy(instance_buffer->bsp_cluster_id_prev, instance_buffer->bsp_cluster_id, sizeof(instance_buffer->bsp_cluster_id)); - memcpy(instance_buffer->model_cluster_id_prev, instance_buffer->model_cluster_id, sizeof(instance_buffer->model_cluster_id)); - + InstanceBuffer* instance_buffer = &vkpt_refdef.uniform_instance_buffer; + static int transparent_model_indices[MAX_ENTITIES]; static int masked_model_indices[MAX_ENTITIES]; static int viewer_model_indices[MAX_ENTITIES]; @@ -1971,8 +2013,7 @@ prepare_entities(EntityUploadInfo* upload_info) int explosion_num = 0; int model_instance_idx = 0; - int bsp_mesh_idx = 0; - int num_instanced_vert = 0; /* need to track this here to find lights */ + int num_instanced_prim = 0; /* need to track this here to find lights */ int instance_idx = 0; int iqm_matrix_offset = 0; @@ -1984,13 +2025,7 @@ prepare_entities(EntityUploadInfo* upload_info) if (entity->model & 0x80000000) { - const bsp_model_t* model = vkpt_refdef.bsp_mesh_world.models + (~entity->model); - if (model->masked) - masked_model_indices[masked_model_num++] = i; - else if (model->transparent) - transparent_model_indices[transparent_model_num++] = i; - else - process_bsp_entity(entity, &bsp_mesh_idx, &instance_idx, &num_instanced_vert); /* embedded in bsp */ + process_bsp_entity(entity, &model_instance_idx); /* embedded in bsp */ } else { @@ -2008,7 +2043,7 @@ prepare_entities(EntityUploadInfo* upload_info) { bool contains_transparent = false; bool contains_masked = false; - process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_vert, + process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_prim, MESH_FILTER_OPAQUE, &contains_transparent, &contains_masked, &iqm_matrix_offset, qvk.iqm_matrices_shadow); if (contains_transparent) @@ -2028,118 +2063,97 @@ prepare_entities(EntityUploadInfo* upload_info) } } - upload_info->dynamic_vertex_num = num_instanced_vert; - - const uint32_t transparent_model_base_vertex_num = num_instanced_vert; + upload_info->opqaue_prim_count = num_instanced_prim; + upload_info->transparent_prim_offset = num_instanced_prim; + for (int i = 0; i < transparent_model_num; i++) { const entity_t* entity = vkpt_refdef.fd->entities + transparent_model_indices[i]; - if (entity->model & 0x80000000) - { - process_bsp_entity(entity, &bsp_mesh_idx, &instance_idx, &num_instanced_vert); - } - else - { - const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_vert, - MESH_FILTER_TRANSPARENT, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); - } + const model_t* model = MOD_ForHandle(entity->model); + process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_prim, + MESH_FILTER_TRANSPARENT, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } - upload_info->transparent_model_vertex_offset = transparent_model_base_vertex_num; - upload_info->transparent_model_vertex_num = num_instanced_vert - transparent_model_base_vertex_num; + upload_info->transparent_prim_count = num_instanced_prim - upload_info->transparent_prim_offset; + upload_info->masked_prim_offset = num_instanced_prim; - const uint32_t masked_model_base_vertex_num = num_instanced_vert; for (int i = 0; i < masked_model_num; i++) { const entity_t* entity = vkpt_refdef.fd->entities + masked_model_indices[i]; - - if (entity->model & 0x80000000) - { - process_bsp_entity(entity, &bsp_mesh_idx, &instance_idx, &num_instanced_vert); - } - else - { - const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, false, true, &model_instance_idx, &instance_idx, &num_instanced_vert, - MESH_FILTER_MASKED, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); - } + + const model_t* model = MOD_ForHandle(entity->model); + process_regular_entity(entity, model, false, true, &model_instance_idx, &instance_idx, &num_instanced_prim, + MESH_FILTER_MASKED, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } - upload_info->masked_model_vertex_offset = masked_model_base_vertex_num; - upload_info->masked_model_vertex_num = num_instanced_vert - masked_model_base_vertex_num; - - const uint32_t viewer_model_base_vertex_num = num_instanced_vert; + upload_info->masked_prim_count = num_instanced_prim - upload_info->masked_prim_offset; + upload_info->viewer_model_prim_offset = num_instanced_prim; + if (first_person_model) { for (int i = 0; i < viewer_model_num; i++) { const entity_t* entity = vkpt_refdef.fd->entities + viewer_model_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, false, true, &model_instance_idx, &instance_idx, &num_instanced_vert, + process_regular_entity(entity, model, false, true, &model_instance_idx, &instance_idx, &num_instanced_prim, MESH_FILTER_ALL, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } } - upload_info->viewer_model_vertex_offset = viewer_model_base_vertex_num; - upload_info->viewer_model_vertex_num = num_instanced_vert - viewer_model_base_vertex_num; + upload_info->viewer_model_prim_count = num_instanced_prim - upload_info->viewer_model_prim_offset; + upload_info->viewer_weapon_prim_offset = num_instanced_prim; upload_info->weapon_left_handed = false; - - const uint32_t viewer_weapon_base_vertex_num = num_instanced_vert; + for (int i = 0; i < viewer_weapon_num; i++) { const entity_t* entity = vkpt_refdef.fd->entities + viewer_weapon_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, true, false, &model_instance_idx, &instance_idx, &num_instanced_vert, + process_regular_entity(entity, model, true, false, &model_instance_idx, &instance_idx, &num_instanced_prim, MESH_FILTER_ALL, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); if (entity->flags & RF_LEFTHAND) upload_info->weapon_left_handed = true; } - upload_info->viewer_weapon_vertex_offset = viewer_weapon_base_vertex_num; - upload_info->viewer_weapon_vertex_num = num_instanced_vert - viewer_weapon_base_vertex_num; - - const uint32_t explosion_base_vertex_num = num_instanced_vert; + upload_info->viewer_weapon_prim_count = num_instanced_prim - upload_info->viewer_weapon_prim_offset; + upload_info->explosions_prim_offset = num_instanced_prim; + for (int i = 0; i < explosion_num; i++) { const entity_t* entity = vkpt_refdef.fd->entities + explosion_indices[i]; const model_t* model = MOD_ForHandle(entity->model); - process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_vert, + process_regular_entity(entity, model, false, false, &model_instance_idx, &instance_idx, &num_instanced_prim, MESH_FILTER_ALL, NULL, NULL, &iqm_matrix_offset, qvk.iqm_matrices_shadow); } - upload_info->explosions_vertex_offset = explosion_base_vertex_num; - upload_info->explosions_vertex_num = num_instanced_vert - explosion_base_vertex_num; + upload_info->explosions_prim_count = num_instanced_prim - upload_info->explosions_prim_offset; upload_info->num_instances = instance_idx; - upload_info->num_vertices = num_instanced_vert; - - memset(instance_buffer->world_current_to_prev, ~0u, sizeof(instance_buffer->world_current_to_prev)); - memset(instance_buffer->world_prev_to_current, ~0u, sizeof(instance_buffer->world_prev_to_current)); - memset(instance_buffer->model_current_to_prev, ~0u, sizeof(instance_buffer->model_current_to_prev)); - memset(instance_buffer->model_prev_to_current, ~0u, sizeof(instance_buffer->model_prev_to_current)); - - world_entity_id_count[entity_frame_num] = bsp_mesh_idx; - for(int i = 0; i < world_entity_id_count[entity_frame_num]; i++) { - for(int j = 0; j < world_entity_id_count[!entity_frame_num]; j++) { - if(world_entity_ids[entity_frame_num][i] == world_entity_ids[!entity_frame_num][j]) { - instance_buffer->world_current_to_prev[i] = j; - instance_buffer->world_prev_to_current[j] = i; - } - } - } - + upload_info->num_prims = num_instanced_prim; + + memset(instance_buffer->model_current_to_prev, -1, sizeof(instance_buffer->model_current_to_prev)); + memset(instance_buffer->model_prev_to_current, -1, sizeof(instance_buffer->model_prev_to_current)); + model_entity_id_count[entity_frame_num] = model_instance_idx; for(int i = 0; i < model_entity_id_count[entity_frame_num]; i++) { for(int j = 0; j < model_entity_id_count[!entity_frame_num]; j++) { entity_hash_t hash = *(entity_hash_t*)&model_entity_ids[entity_frame_num][i]; - if(model_entity_ids[entity_frame_num][i] == model_entity_ids[!entity_frame_num][j] && hash.entity != 0) { + if(model_entity_ids[entity_frame_num][i] == model_entity_ids[!entity_frame_num][j] && hash.entity != 0u) { instance_buffer->model_current_to_prev[i] = j; instance_buffer->model_prev_to_current[j] = i; + + // Copy the "prev" instance paramters from the previous frame's instance buffer + ModelInstance* mi_curr = instance_buffer->model_instances + i; + ModelInstance* mi_prev = model_instances_prev + j; + + memcpy(mi_curr->transform_prev, mi_prev->transform, sizeof(mi_curr->transform_prev)); + mi_curr->prim_offset_curr_pose_prev_frame = mi_prev->prim_offset_curr_pose_curr_frame; + mi_curr->prim_offset_prev_pose_prev_frame = mi_prev->prim_offset_prev_pose_curr_frame; + mi_curr->pose_lerp_prev_frame = mi_prev->pose_lerp_curr_frame; + mi_curr->iqm_matrix_offset_prev_frame = mi_prev->iqm_matrix_offset_curr_frame; } } } @@ -2156,13 +2170,13 @@ prepare_entities(EntityUploadInfo* upload_info) memcpy(qvk.iqm_matrices_shadow + (iqm_matrix_count[entity_frame_num] * 12), qvk.iqm_matrices_prev, iqm_matrix_count[!entity_frame_num] * 12 * sizeof(float)); - // Patch the previous model instances to point at the offset matrices - for (int i = 0; i < model_entity_id_count[!entity_frame_num]; i++) + // Patch the previous matrix offsets to point at the new locations + for (int i = 0; i < model_entity_id_count[entity_frame_num]; i++) { - ModelInstance* instance = &instance_buffer->model_instances_prev[i]; - if (instance->is_iqm) { + ModelInstance* instance = &instance_buffer->model_instances[i]; + if (instance->iqm_matrix_offset_prev_frame >= 0) { // Offset = current matrix count - instance->offset_prev += iqm_matrix_count[entity_frame_num]; + instance->iqm_matrix_offset_prev_frame += iqm_matrix_count[entity_frame_num]; } } } @@ -2178,6 +2192,9 @@ prepare_entities(EntityUploadInfo* upload_info) buffer_unmap(&qvk.buf_iqm_matrices_staging[qvk.current_frame_index]); } + + // Save the current model instances for the next frame + memcpy(model_instances_prev, instance_buffer->model_instances, sizeof(ModelInstance) * model_entity_id_count[entity_frame_num]); } #ifdef VKPT_IMAGE_DUMPS @@ -2503,6 +2520,8 @@ prepare_camera(const vec3_t position, const vec3_t direction, mat4_t data) static void prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, const vec3_t sky_matrix[3], bool render_world) { + const bsp_mesh_t* wm = &vkpt_refdef.bsp_mesh_world; + float P[16]; float V[16]; @@ -2535,8 +2554,8 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c create_view_matrix(V, fd); memcpy(ubo->V, V, sizeof(float) * 16); memcpy(ubo->P, P, sizeof(float) * 16); - inverse(V, ubo->invV); - inverse(P, ubo->invP); + inverse(V, *ubo->invV); + inverse(P, *ubo->invP); if (cvar_pt_projection->integer == 1 && render_world) { @@ -2582,7 +2601,10 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->medium = MEDIUM_NONE; ubo->time = fd->time; - ubo->num_static_primitives = (vkpt_refdef.bsp_mesh_world.world_idx_count + vkpt_refdef.bsp_mesh_world.world_transparent_count + vkpt_refdef.bsp_mesh_world.world_masked_count) / 3; + ubo->num_static_primitives = 0; + if (wm->geom_opaque.prim_counts) ubo->num_static_primitives += wm->geom_opaque.prim_counts[0]; + if (wm->geom_transparent.prim_counts) ubo->num_static_primitives += wm->geom_transparent.prim_counts[0]; + if (wm->geom_masked.prim_counts) ubo->num_static_primitives += wm->geom_masked.prim_counts[0]; ubo->num_static_lights = vkpt_refdef.bsp_mesh_world.num_light_polys; vkpt_fog_upload(ubo->fog_volumes); @@ -2695,18 +2717,17 @@ prepare_ubo(refdef_t *fd, mleaf_t* viewleaf, const reference_mode_t* ref_mode, c ubo->first_person_model = cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON; memset(ubo->environment_rotation_matrix, 0, sizeof(ubo->environment_rotation_matrix)); - VectorCopy(sky_matrix[0], ubo->environment_rotation_matrix + 0); - VectorCopy(sky_matrix[1], ubo->environment_rotation_matrix + 4); - VectorCopy(sky_matrix[2], ubo->environment_rotation_matrix + 8); + VectorCopy(sky_matrix[0], ubo->environment_rotation_matrix[0]); + VectorCopy(sky_matrix[1], ubo->environment_rotation_matrix[1]); + VectorCopy(sky_matrix[2], ubo->environment_rotation_matrix[2]); add_dlights(vkpt_refdef.fd->dlights, vkpt_refdef.fd->num_dlights, ubo); - const bsp_mesh_t* wm = &vkpt_refdef.bsp_mesh_world; if (wm->num_cameras > 0) { for (int n = 0; n < wm->num_cameras; n++) { - prepare_camera(wm->cameras[n].pos, wm->cameras[n].dir, ubo->security_camera_data[n]); + prepare_camera(wm->cameras[n].pos, wm->cameras[n].dir, *ubo->security_camera_data[n]); } } else @@ -2793,12 +2814,22 @@ R_RenderFrame_RTX(refdef_t *fd) num_model_lights = 0; EntityUploadInfo upload_info = { 0 }; + vkpt_pt_reset_instances(); + vkpt_shadow_map_reset_instances(); prepare_entities(&upload_info); - if (bsp_world_model) + if (bsp_world_model && render_world) { + vkpt_pt_instance_model_blas(&vkpt_refdef.bsp_mesh_world.geom_opaque, g_identity_transform, VERTEX_BUFFER_WORLD, -1); + vkpt_pt_instance_model_blas(&vkpt_refdef.bsp_mesh_world.geom_transparent, g_identity_transform, VERTEX_BUFFER_WORLD, -1); + vkpt_pt_instance_model_blas(&vkpt_refdef.bsp_mesh_world.geom_masked, g_identity_transform, VERTEX_BUFFER_WORLD, -1); + vkpt_pt_instance_model_blas(&vkpt_refdef.bsp_mesh_world.geom_sky, g_identity_transform, VERTEX_BUFFER_WORLD, -1); + vkpt_pt_instance_model_blas(&vkpt_refdef.bsp_mesh_world.geom_custom_sky, g_identity_transform, VERTEX_BUFFER_WORLD, -1); + vkpt_build_beam_lights(model_lights, &num_model_lights, MAX_MODEL_LIGHTS, bsp_world_model, fd->entities, fd->num_entities, prev_adapted_luminance); } + vkpt_vertex_buffer_ensure_primbuf_size(upload_info.num_prims); + QVKUniformBuffer_t *ubo = &vkpt_refdef.uniform_buffer; prepare_ubo(fd, viewleaf, &ref_mode, sky_matrix, render_world); ubo->prev_adapted_luminance = prev_adapted_luminance; @@ -2830,8 +2861,8 @@ R_RenderFrame_RTX(refdef_t *fd) vkpt_god_rays_prepare_ubo( ubo, &vkpt_refdef.bsp_mesh_world.world_aabb, - ubo->P, - ubo->V, + *ubo->P, + *ubo->V, shadowmap_view_proj, shadowmap_depth_scale); @@ -2878,9 +2909,11 @@ R_RenderFrame_RTX(refdef_t *fd) { VkCommandBuffer trace_cmd_buf = vkpt_begin_command_buffer(&qvk.cmd_buffers_graphics); - update_transparency(trace_cmd_buf, ubo->V, fd->particles, fd->num_particles, fd->entities, fd->num_entities); + update_transparency(trace_cmd_buf, *ubo->V, fd->particles, fd->num_particles, fd->entities, fd->num_entities); - _VK(vkpt_uniform_buffer_update(trace_cmd_buf)); + // Copy the UBO contents from the staging buffer. + // Actual contents are uploaded to the staging UBO below, right before executing the command buffer. + vkpt_uniform_buffer_copy_from_staging(trace_cmd_buf); // put a profiler query without a marker for the frame begin/end - because markers do not // work well across different command lists @@ -2898,9 +2931,8 @@ R_RenderFrame_RTX(refdef_t *fd) END_PERF_MARKER(trace_cmd_buf, PROFILER_INSTANCE_GEOMETRY); BEGIN_PERF_MARKER(trace_cmd_buf, PROFILER_BVH_UPDATE); - assert(upload_info.num_vertices % 3 == 0); vkpt_pt_create_all_dynamic(trace_cmd_buf, qvk.current_frame_index, &upload_info); - vkpt_pt_create_toplevel(trace_cmd_buf, qvk.current_frame_index, render_world, upload_info.weapon_left_handed); + vkpt_pt_create_toplevel(trace_cmd_buf, qvk.current_frame_index, &upload_info, upload_info.weapon_left_handed); vkpt_pt_update_descripter_set_bindings(qvk.current_frame_index); END_PERF_MARKER(trace_cmd_buf, PROFILER_BVH_UPDATE); @@ -2908,15 +2940,20 @@ R_RenderFrame_RTX(refdef_t *fd) if (god_rays_enabled) { vkpt_shadow_map_render(trace_cmd_buf, shadowmap_view_proj, - vkpt_refdef.bsp_mesh_world.world_idx_count, - upload_info.dynamic_vertex_num, - vkpt_refdef.bsp_mesh_world.world_transparent_offset, - vkpt_refdef.bsp_mesh_world.world_transparent_count); + vkpt_refdef.bsp_mesh_world.geom_opaque.prim_offsets[0] * 3, + vkpt_refdef.bsp_mesh_world.geom_opaque.prim_counts[0] * 3, + 0, + upload_info.opqaue_prim_count * 3, + vkpt_refdef.bsp_mesh_world.geom_transparent.prim_offsets[0] * 3, + vkpt_refdef.bsp_mesh_world.geom_transparent.prim_counts[0] * 3); } END_PERF_MARKER(trace_cmd_buf, PROFILER_SHADOW_MAP); vkpt_pt_trace_primary_rays(trace_cmd_buf); + // The host-side image of the uniform buffer is only ready after the `vkpt_pt_create_toplevel` call above + _VK(vkpt_uniform_buffer_upload_to_staging()); + vkpt_submit_command_buffer( trace_cmd_buf, qvk.queue_graphics, @@ -3658,6 +3695,12 @@ R_Shutdown_RTX(bool total) Cmd_RemoveCommand("drop_balls"); #endif + if (vkpt_refdef.bsp_mesh_world_loaded) + { + vkpt_vertex_buffer_cleanup_bsp_mesh(&vkpt_refdef.bsp_mesh_world); + bsp_mesh_destroy(&vkpt_refdef.bsp_mesh_world); + } + vkpt_fog_shutdown(); vkpt_cameras_shutdown(); MAT_Shutdown(); @@ -3999,6 +4042,7 @@ R_BeginRegistration_RTX(const char *name) Com_AddConfigFile(va("maps/%s.cfg", name), 0); if(vkpt_refdef.bsp_mesh_world_loaded) { + vkpt_vertex_buffer_cleanup_bsp_mesh(&vkpt_refdef.bsp_mesh_world); bsp_mesh_destroy(&vkpt_refdef.bsp_mesh_world); vkpt_refdef.bsp_mesh_world_loaded = 0; } @@ -4019,8 +4063,7 @@ R_BeginRegistration_RTX(const char *name) bsp_mesh_register_textures(bsp); bsp_mesh_create_from_bsp(&vkpt_refdef.bsp_mesh_world, bsp, name); vkpt_light_stats_create(&vkpt_refdef.bsp_mesh_world); - _VK(vkpt_vertex_buffer_upload_bsp_mesh_to_staging(&vkpt_refdef.bsp_mesh_world)); - _VK(vkpt_vertex_buffer_bsp_upload_staging()); + _VK(vkpt_vertex_buffer_upload_bsp_mesh(&vkpt_refdef.bsp_mesh_world)); vkpt_refdef.bsp_mesh_world_loaded = 1; bsp = NULL; world_anim_frame = 0; @@ -4036,15 +4079,6 @@ R_BeginRegistration_RTX(const char *name) vkpt_tone_mapping_request_reset(); vkpt_light_buffer_reset_counts(); - vkpt_pt_destroy_static(); - const bsp_mesh_t *m = &vkpt_refdef.bsp_mesh_world; - _VK(vkpt_pt_create_static( - m->world_idx_count, - m->world_transparent_count, - m->world_masked_count, - m->world_sky_count, - m->world_custom_sky_count)); - memset(cluster_debug_mask, 0, sizeof(cluster_debug_mask)); cluster_debug_index = -1; diff --git a/src/refresh/vkpt/material.c b/src/refresh/vkpt/material.c index c33a93540..1b86b2df6 100644 --- a/src/refresh/vkpt/material.c +++ b/src/refresh/vkpt/material.c @@ -1074,8 +1074,17 @@ static void material_command(void) reload_flags |= RELOAD_MAP; } } + if ((reload_flags & RELOAD_MAP) != 0) + { + // Trigger a re-upload and rebuild of the models that use this material. + // Reason to rebuild: some material changes result in meshes being classified as + // transparent or masked, which affects the static model BLAS. + vkpt_vertex_buffer_invalidate_static_model_vbos(vkpt_refdef.fd->feedback.view_material_index); + + // Reload the map and necessary models. CL_PrepRefresh(); + } } static void material_completer(genctx_t* ctx, int argnum) @@ -1135,3 +1144,18 @@ void MAT_SynthesizeEmissive(pbr_material_t * mat) } } } + +bool MAT_IsTransparent(uint32_t material) +{ + return MAT_IsKind(material, MATERIAL_KIND_SLIME) + || MAT_IsKind(material, MATERIAL_KIND_WATER) + || MAT_IsKind(material, MATERIAL_KIND_GLASS) + || MAT_IsKind(material, MATERIAL_KIND_TRANSPARENT); +} + +bool MAT_IsMasked(uint32_t material) +{ + const pbr_material_t* mat = MAT_ForIndex((int)(material & MATERIAL_INDEX_MASK)); + + return mat && mat->image_mask; +} diff --git a/src/refresh/vkpt/material.h b/src/refresh/vkpt/material.h index 86dfc220b..462c529ec 100644 --- a/src/refresh/vkpt/material.h +++ b/src/refresh/vkpt/material.h @@ -99,4 +99,10 @@ bool MAT_IsKind(uint32_t material, uint32_t kind); // synthesize 'emissive' image for a material, if necessary void MAT_SynthesizeEmissive(pbr_material_t * mat); +// test if the material is one of the trapsnarent kinds (glass, water, ...) +bool MAT_IsTransparent(uint32_t material); + +// test if the material has an alpha mask +bool MAT_IsMasked(uint32_t material); + #endif // __MATERIAL_H_ diff --git a/src/refresh/vkpt/models.c b/src/refresh/vkpt/models.c index a2d497e2c..0ec2035c1 100644 --- a/src/refresh/vkpt/models.c +++ b/src/refresh/vkpt/models.c @@ -208,6 +208,106 @@ static void extract_model_lights(model_t* model) } } +static void compute_missing_model_tangents(model_t* model) +{ + for (int mesh_idx = 0; mesh_idx < model->nummeshes; mesh_idx++) + { + maliasmesh_t* mesh = model->meshes + mesh_idx; + + if (mesh->tangents) + continue; + + size_t tangent_size = mesh->numverts * model->numframes * sizeof(vec3_t); + + mesh->tangents = MOD_Malloc(tangent_size); + + memset(mesh->tangents, 0, tangent_size); + + int handedness = 0; + + for (int frame = 0; frame < model->numframes; frame++) + { + int voffset = frame * mesh->numverts; + + for (int tri = 0; tri < mesh->numtris; tri++) + { + int iA = mesh->indices[tri * 3 + 0] + voffset; + int iB = mesh->indices[tri * 3 + 1] + voffset; + int iC = mesh->indices[tri * 3 + 2] + voffset; + + const vec3_t* pA = mesh->positions + iA; + const vec3_t* pB = mesh->positions + iB; + const vec3_t* pC = mesh->positions + iC; + + const vec2_t* tA = mesh->tex_coords + iA; + const vec2_t* tB = mesh->tex_coords + iB; + const vec2_t* tC = mesh->tex_coords + iC; + + vec3_t dP0, dP1; + VectorSubtract(*pB, *pA, dP0); + VectorSubtract(*pC, *pA, dP1); + + vec2_t dt0, dt1; + Vector2Subtract(*tB, *tA, dt0); + Vector2Subtract(*tC, *tA, dt1); + + float inv_r = dt0[0] * dt1[1] - dt1[0] * dt0[1]; + + if (inv_r == 0.f) + continue; + + float r = 1.f / inv_r; + + vec3_t tangent = { + (dt1[1] * dP0[0] - dt0[1] * dP1[0]) * r, + (dt1[1] * dP0[1] - dt0[1] * dP1[1]) * r, + (dt1[1] * dP0[2] - dt0[1] * dP1[2]) * r }; + + VectorNormalize(tangent); + + vec3_t* tangentA = mesh->tangents + iA; + vec3_t* tangentB = mesh->tangents + iB; + vec3_t* tangentC = mesh->tangents + iC; + + VectorAdd(*tangentA, tangent, *tangentA); + VectorAdd(*tangentB, tangent, *tangentB); + VectorAdd(*tangentC, tangent, *tangentC); + + if (handedness == 0) + { + vec3_t bitangent = { + (dt0[0] * dP1[0] - dt1[0] * dP0[0]) * r, + (dt0[0] * dP1[1] - dt1[0] * dP0[1]) * r, + (dt0[0] * dP1[2] - dt1[0] * dP0[2]) * r }; + + VectorNormalize(bitangent); + + const vec3_t* normal = mesh->normals + iA; + + vec3_t cross; + CrossProduct(*normal, tangent, cross); + + float dot = DotProduct(cross, bitangent); + + if (dot < 0.f) + handedness = -1; + else if (dot > 0.f) + handedness = 1; + } + } + } + + for (int vtx = 0; vtx < mesh->numverts * model->numframes; vtx++) + { + vec3_t* tangent = mesh->tangents + vtx; + + VectorNormalize(*tangent); + } + + mesh->handedness = (handedness < 0); + } +} + int MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length, const char* mod_name) { dmd2header_t header; @@ -462,6 +562,8 @@ int MOD_LoadMD2_RTX(model_t *model, const void *rawdata, size_t length, const ch dst_mesh->indices[i + 2] = tmp; } + compute_missing_model_tangents(model); + extract_model_lights(model); Hunk_End(&model->hunk); @@ -686,6 +788,8 @@ int MOD_LoadMD3_RTX(model_t *model, const void *rawdata, size_t length, const ch dst_frame++; } + compute_missing_model_tangents(model); + extract_model_lights(model); Hunk_End(&model->hunk); @@ -728,7 +832,7 @@ int MOD_LoadIQM_RTX(model_t* model, const void* rawdata, size_t length, const ch mesh->tex_coords = iqm_mesh->data->texcoords ? (vec2_t*)(iqm_mesh->data->texcoords + iqm_mesh->first_vertex * 2) : NULL; mesh->tangents = iqm_mesh->data->tangents ? (vec3_t*)(iqm_mesh->data->tangents + iqm_mesh->first_vertex * 3) : NULL; mesh->blend_indices = iqm_mesh->data->blend_indices ? (uint32_t*)(iqm_mesh->data->blend_indices + iqm_mesh->first_vertex * 4) : NULL; - mesh->blend_weights = iqm_mesh->data->blend_weights ? (vec4_t*)(iqm_mesh->data->blend_weights + iqm_mesh->first_vertex * 4) : NULL; + mesh->blend_weights = iqm_mesh->data->blend_weights ? (uint32_t*)(iqm_mesh->data->blend_weights + iqm_mesh->first_vertex * 4) : NULL; mesh->numindices = (int)(iqm_mesh->num_triangles * 3); mesh->numverts = (int)iqm_mesh->num_vertexes; @@ -756,6 +860,8 @@ int MOD_LoadIQM_RTX(model_t* model, const void* rawdata, size_t length, const ch mesh->numskins = 1; // looks like IQM only supports one skin? } + compute_missing_model_tangents(model); + extract_model_lights(model); Hunk_End(&model->hunk); @@ -766,6 +872,8 @@ int MOD_LoadIQM_RTX(model_t* model, const void* rawdata, size_t length, const ch return ret; } +extern model_vbo_t model_vertex_data[]; + void MOD_Reference_RTX(model_t *model) { int mesh_idx, skin_idx, frame_idx; @@ -792,6 +900,7 @@ void MOD_Reference_RTX(model_t *model) } model->registration_sequence = registration_sequence; + model_vertex_data[model - r_models].registration_sequence = registration_sequence; } // vim: shiftwidth=4 noexpandtab tabstop=4 cindent diff --git a/src/refresh/vkpt/path_tracer.c b/src/refresh/vkpt/path_tracer.c index 2ee966976..8a5eb44aa 100644 --- a/src/refresh/vkpt/path_tracer.c +++ b/src/refresh/vkpt/path_tracer.c @@ -33,8 +33,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define SIZE_SCRATCH_BUFFER (1 << 25) -#define INSTANCE_MAX_NUM 14 - static uint32_t shaderGroupHandleSize = 0; static uint32_t shaderGroupBaseAlignment = 0; static uint32_t minAccelerationStructureScratchOffsetAlignment = 0; @@ -66,23 +64,9 @@ typedef enum { PIPELINE_COUNT } pipeline_index_t; -static BufferResource_t buf_accel_scratch; + BufferResource_t buf_accel_scratch; static size_t scratch_buf_ptr = 0; static BufferResource_t buf_instances[MAX_FRAMES_IN_FLIGHT]; -static int transparent_primitive_offset = 0; -static int masked_primitive_offset = 0; -static int sky_primitive_offset = 0; -static int custom_sky_primitive_offset = 0; -static int transparent_model_primitive_offset = 0; -static int masked_model_primitive_offset = 0; -static int viewer_model_primitive_offset = 0; -static int viewer_weapon_primitive_offset = 0; -static int explosions_primitive_offset = 0; -static accel_struct_t blas_static; -static accel_struct_t blas_transparent; -static accel_struct_t blas_masked; -static accel_struct_t blas_sky; -static accel_struct_t blas_custom_sky; static accel_struct_t blas_dynamic[MAX_FRAMES_IN_FLIGHT]; static accel_struct_t blas_transparent_models[MAX_FRAMES_IN_FLIGHT]; static accel_struct_t blas_masked_models[MAX_FRAMES_IN_FLIGHT]; @@ -121,6 +105,9 @@ typedef struct QvkGeometryInstance_s { VkDeviceAddress acceleration_structure; } QvkGeometryInstance_t; +static uint32_t g_num_instances; +static QvkGeometryInstance_t g_instances[MAX_TLAS_INSTANCES]; + typedef struct { int gpu_index; int bounce; @@ -170,7 +157,7 @@ vkpt_pt_init() VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) { - buffer_create(buf_instances + i, INSTANCE_MAX_NUM * sizeof(QvkGeometryInstance_t), + buffer_create(buf_instances + i, MAX_TLAS_INSTANCES * sizeof(QvkGeometryInstance_t), VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); } @@ -362,15 +349,6 @@ static void destroy_accel_struct(accel_struct_t* blas) blas->match.instance_count = 0; } -void vkpt_pt_destroy_static() -{ - destroy_accel_struct(&blas_static); - destroy_accel_struct(&blas_transparent); - destroy_accel_struct(&blas_masked); - destroy_accel_struct(&blas_sky); - destroy_accel_struct(&blas_custom_sky); -} - static void vkpt_pt_destroy_dynamic(int idx) { destroy_accel_struct(&blas_dynamic[idx]); @@ -412,15 +390,15 @@ static inline int accel_matches_top_level(accel_match_info_t *match, #define DYNAMIC_GEOMETRY_BLOAT_FACTOR 2 -static VkResult +static void vkpt_pt_create_accel_bottom( VkCommandBuffer cmd_buf, BufferResource_t* buffer_vertex, VkDeviceAddress offset_vertex, BufferResource_t* buffer_index, VkDeviceAddress offset_index, - int num_vertices, - int num_indices, + uint32_t num_vertices, + uint32_t num_indices, accel_struct_t* blas, bool is_dynamic, bool fast_build) @@ -430,7 +408,7 @@ vkpt_pt_create_accel_bottom( if (num_vertices == 0) { blas->present = false; - return VK_SUCCESS; + return; } assert(buffer_vertex->address); @@ -441,7 +419,7 @@ vkpt_pt_create_accel_bottom( .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, .vertexData = {.deviceAddress = buffer_vertex->address + offset_vertex }, .vertexStride = sizeof(float) * 3, - .maxVertex = num_vertices - 1, + .maxVertex = max(num_vertices, 1) - 1, .indexData = {.deviceAddress = buffer_index ? (buffer_index->address + offset_index) : 0 }, .indexType = buffer_index ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_NONE_KHR, }; @@ -493,8 +471,8 @@ vkpt_pt_create_accel_bottom( if (doAlloc) { - int num_vertices_to_allocate = num_vertices; - int num_indices_to_allocate = num_indices; + uint32_t num_vertices_to_allocate = num_vertices; + uint32_t num_indices_to_allocate = num_indices; // Allocate more memory / larger BLAS for dynamic objects if (is_dynamic) @@ -546,11 +524,9 @@ vkpt_pt_create_accel_bottom( qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &buildInfo, &offsets); blas->present = true; - - return VK_SUCCESS; } -static VkResult +static void vkpt_pt_create_accel_bottom_aabb( VkCommandBuffer cmd_buf, BufferResource_t* buffer_aabb, @@ -565,7 +541,7 @@ vkpt_pt_create_accel_bottom_aabb( if (num_aabbs == 0) { blas->present = false; - return VK_SUCCESS; + return; } assert(buffer_aabb->address); @@ -674,62 +650,6 @@ vkpt_pt_create_accel_bottom_aabb( qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &buildInfo, &offsets); blas->present = true; - - return VK_SUCCESS; -} - -VkResult -vkpt_pt_create_static( - int num_vertices, - int num_vertices_transparent, - int num_vertices_masked, - int num_vertices_sky, - int num_vertices_custom_sky) -{ - VkCommandBuffer cmd_buf = vkpt_begin_command_buffer(&qvk.cmd_buffers_graphics); - VkDeviceAddress address_vertex = offsetof(BspVertexBuffer, positions_bsp); - - scratch_buf_ptr = 0; - - VkResult ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices, 0, &blas_static, false, false); - - MEM_BARRIER_BUILD_ACCEL(cmd_buf); - address_vertex += num_vertices * sizeof(float) * 3; - scratch_buf_ptr = 0; - - ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_transparent, 0, &blas_transparent, false, false); - - MEM_BARRIER_BUILD_ACCEL(cmd_buf); - address_vertex += num_vertices_transparent * sizeof(float) * 3; - scratch_buf_ptr = 0; - - ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_masked, 0, &blas_masked, false, false); - - MEM_BARRIER_BUILD_ACCEL(cmd_buf); - address_vertex += num_vertices_masked * sizeof(float) * 3; - scratch_buf_ptr = 0; - - ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_sky, 0, &blas_sky, false, false); - - MEM_BARRIER_BUILD_ACCEL(cmd_buf); - address_vertex += num_vertices_sky * sizeof(float) * 3; - scratch_buf_ptr = 0; - - ret = vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_bsp, address_vertex, NULL, 0, num_vertices_custom_sky, 0, &blas_custom_sky, false, false); - - MEM_BARRIER_BUILD_ACCEL(cmd_buf); - address_vertex += num_vertices_custom_sky * sizeof(float) * 3; - scratch_buf_ptr = 0; - - transparent_primitive_offset = num_vertices / 3; - masked_primitive_offset = transparent_primitive_offset + num_vertices_transparent / 3; - sky_primitive_offset = masked_primitive_offset + num_vertices_masked / 3; - custom_sky_primitive_offset = sky_primitive_offset + num_vertices_sky / 3; - - vkpt_submit_command_buffer_simple(cmd_buf, qvk.queue_graphics, true); - vkpt_wait_idle(qvk.queue_graphics, &qvk.cmd_buffers_graphics); - - return ret; } VkResult @@ -740,30 +660,31 @@ vkpt_pt_create_all_dynamic( { scratch_buf_ptr = 0; - uint64_t offset_vertex_base = offsetof(ModelDynamicVertexBuffer, positions_instanced); + uint64_t offset_vertex_base = 0; uint64_t offset_vertex = offset_vertex_base; uint64_t offset_index = 0; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->dynamic_vertex_num, 0, blas_dynamic + idx, true, true); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->opqaue_prim_count * 3, 0, blas_dynamic + idx, true, true); - transparent_model_primitive_offset = upload_info->transparent_model_vertex_offset / 3; - offset_vertex = offset_vertex_base + upload_info->transparent_model_vertex_offset * sizeof(float) * 3; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->transparent_model_vertex_num, 0, blas_transparent_models + idx, true, true); + offset_vertex = offset_vertex_base + upload_info->transparent_prim_offset * sizeof(prim_positions_t); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->transparent_prim_count * 3, 0, blas_transparent_models + idx, true, true); - masked_model_primitive_offset = upload_info->masked_model_vertex_offset / 3; - offset_vertex = offset_vertex_base + upload_info->masked_model_vertex_offset * sizeof(float) * 3; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->masked_model_vertex_num, 0, blas_masked_models + idx, true, true); + offset_vertex = offset_vertex_base + upload_info->masked_prim_offset * sizeof(prim_positions_t); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->masked_prim_count * 3, 0, blas_masked_models + idx, true, true); - viewer_model_primitive_offset = upload_info->viewer_model_vertex_offset / 3; - offset_vertex = offset_vertex_base + upload_info->viewer_model_vertex_offset * sizeof(float) * 3; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->viewer_model_vertex_num, 0, blas_viewer_models + idx, true, true); + offset_vertex = offset_vertex_base + upload_info->viewer_model_prim_offset * sizeof(prim_positions_t); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->viewer_model_prim_count * 3, 0, blas_viewer_models + idx, true, true); - viewer_weapon_primitive_offset = upload_info->viewer_weapon_vertex_offset / 3; - offset_vertex = offset_vertex_base + upload_info->viewer_weapon_vertex_offset * sizeof(float) * 3; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->viewer_weapon_vertex_num, 0, blas_viewer_weapon + idx, true, true); + offset_vertex = offset_vertex_base + upload_info->viewer_weapon_prim_offset * sizeof(prim_positions_t); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->viewer_weapon_prim_count * 3, 0, blas_viewer_weapon + idx, true, true); - explosions_primitive_offset = upload_info->explosions_vertex_offset / 3; - offset_vertex = offset_vertex_base + upload_info->explosions_vertex_offset * sizeof(float) * 3; - vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_vertex_model_dynamic, offset_vertex, NULL, offset_index, upload_info->explosions_vertex_num, 0, blas_explosions + idx, true, true); + offset_vertex = offset_vertex_base + upload_info->explosions_prim_offset * sizeof(prim_positions_t); + vkpt_pt_create_accel_bottom(cmd_buf, &qvk.buf_positions_instanced, offset_vertex, NULL, offset_index, + upload_info->explosions_prim_count * 3, 0, blas_explosions + idx, true, true); BufferResource_t* buffer_vertex = NULL; BufferResource_t* buffer_index = NULL; @@ -788,7 +709,7 @@ vkpt_pt_create_all_dynamic( } static void -append_blas(QvkGeometryInstance_t *instances, uint32_t *num_instances, accel_struct_t* blas, int instance_id, int mask, int flags, int sbt_offset) +append_blas(QvkGeometryInstance_t *instances, uint32_t *num_instances, accel_struct_t* blas, int vbo_index, uint prim_offset, int mask, int flags, int sbt_offset) { if (!blas->present) return; @@ -799,7 +720,7 @@ append_blas(QvkGeometryInstance_t *instances, uint32_t *num_instances, accel_str 0.0f, 1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f, 0.0f, }, - .instance_id = instance_id, + .instance_id = vbo_index, .mask = mask, .instance_offset = sbt_offset, .flags = flags, @@ -813,11 +734,43 @@ append_blas(QvkGeometryInstance_t *instances, uint32_t *num_instances, accel_str instance.acceleration_structure = qvkGetAccelerationStructureDeviceAddressKHR(qvk.device, &as_device_address_info); - assert(*num_instances < INSTANCE_MAX_NUM); + assert(*num_instances < MAX_TLAS_INSTANCES); memcpy(instances + *num_instances, &instance, sizeof(instance)); + vkpt_refdef.uniform_instance_buffer.tlas_instance_prim_offsets[*num_instances] = prim_offset; + vkpt_refdef.uniform_instance_buffer.tlas_instance_model_indices[*num_instances] = -1; ++*num_instances; } +void vkpt_pt_reset_instances() +{ + g_num_instances = 0; +} + +void vkpt_pt_instance_model_blas(const model_geometry_t* geom, const mat4 transform, uint32_t buffer_idx, int model_instance_index) +{ + if (!geom->accel) + return; + + QvkGeometryInstance_t gpu_instance = { + .transform = { // transpose the matrix + transform[0][0], transform[1][0], transform[2][0], transform[3][0], + transform[0][1], transform[1][1], transform[2][1], transform[3][1], + transform[0][2], transform[1][2], transform[2][2], transform[3][2] + }, + .instance_id = buffer_idx, + .mask = geom->instance_mask, + .instance_offset = geom->sbt_offset, + .flags = geom->instance_flags, + .acceleration_structure = geom->blas_device_address, + }; + + assert(g_num_instances < MAX_TLAS_INSTANCES); + memcpy(g_instances + g_num_instances, &gpu_instance, sizeof(gpu_instance)); + vkpt_refdef.uniform_instance_buffer.tlas_instance_prim_offsets[g_num_instances] = geom->prim_offsets[0]; + vkpt_refdef.uniform_instance_buffer.tlas_instance_model_indices[g_num_instances] = model_instance_index; + ++g_num_instances; +} + static void build_tlas(VkCommandBuffer cmd_buf, accel_struct_t* as, VkDeviceAddress instance_data, uint32_t num_instances) { @@ -900,53 +853,55 @@ build_tlas(VkCommandBuffer cmd_buf, accel_struct_t* as, VkDeviceAddress instance } VkResult -vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, bool include_world, bool weapon_left_handed) +vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, const EntityUploadInfo* upload_info, bool weapon_left_handed) { - QvkGeometryInstance_t instances[INSTANCE_MAX_NUM]; - uint32_t num_instances = 0; + append_blas(g_instances, &g_num_instances, &blas_dynamic[idx], VERTEX_BUFFER_INSTANCED, 0, + AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - if (include_world) - { - append_blas(instances, &num_instances, &blas_static, 0, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_transparent, transparent_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_masked, masked_primitive_offset, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_MASKED); - append_blas(instances, &num_instances, &blas_sky, AS_INSTANCE_FLAG_SKY | sky_primitive_offset, AS_FLAG_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_custom_sky, AS_INSTANCE_FLAG_SKY | custom_sky_primitive_offset, AS_FLAG_CUSTOM_SKY, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - } + append_blas(g_instances, &g_num_instances, &blas_transparent_models[idx], VERTEX_BUFFER_INSTANCED, upload_info->transparent_prim_offset, + AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_dynamic[idx], AS_INSTANCE_FLAG_DYNAMIC, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_transparent_models[idx], AS_INSTANCE_FLAG_DYNAMIC | transparent_model_primitive_offset, AS_FLAG_TRANSPARENT, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR, SBTO_OPAQUE); - append_blas(instances, &num_instances, &blas_masked_models[idx], AS_INSTANCE_FLAG_DYNAMIC | masked_model_primitive_offset, AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_MASKED); - append_blas(instances, &num_instances, &blas_viewer_weapon[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_weapon_primitive_offset, AS_FLAG_VIEWER_WEAPON, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | (weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0), SBTO_OPAQUE); + append_blas(g_instances, &g_num_instances, &blas_masked_models[idx], VERTEX_BUFFER_INSTANCED, upload_info->masked_prim_offset, + AS_FLAG_OPAQUE, VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_MASKED); + + append_blas(g_instances, &g_num_instances, &blas_viewer_weapon[idx], VERTEX_BUFFER_INSTANCED, upload_info->viewer_weapon_prim_offset, + AS_FLAG_VIEWER_WEAPON, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | (weapon_left_handed ? VK_GEOMETRY_INSTANCE_TRIANGLE_FRONT_COUNTERCLOCKWISE_BIT_KHR : 0), SBTO_OPAQUE); if (cl_player_model->integer == CL_PLAYER_MODEL_FIRST_PERSON) { - append_blas(instances, &num_instances, &blas_viewer_models[idx], AS_INSTANCE_FLAG_DYNAMIC | viewer_model_primitive_offset, AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_OPAQUE); + append_blas(g_instances, &g_num_instances, &blas_viewer_models[idx], VERTEX_BUFFER_INSTANCED, upload_info->viewer_model_prim_offset, + AS_FLAG_VIEWER_MODELS, VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_OPAQUE); } - uint32_t num_instances_geometry = num_instances; + uint32_t num_instances_geometry = g_num_instances; - append_blas(instances, &num_instances, &blas_explosions[idx], AS_INSTANCE_FLAG_DYNAMIC | explosions_primitive_offset, AS_FLAG_EFFECTS, 0, SBTO_EXPLOSION); + // Note: explosions use a different primitive addressing scheme from the other geometry. + // See the comment in pt_logic_explosion(...) in path_tracer_hit_shaders.h for more info. + append_blas(g_instances, &g_num_instances, &blas_explosions[idx], (int)upload_info->explosions_prim_offset, 0, + AS_FLAG_EFFECTS, 0, SBTO_EXPLOSION); if (cvar_pt_enable_particles->integer != 0) { - append_blas(instances, &num_instances, &blas_particles[idx], 0, AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_PARTICLE); + append_blas(g_instances, &g_num_instances, &blas_particles[idx], 0, 0, + AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_PARTICLE); } if (cvar_pt_enable_beams->integer != 0) { - append_blas(instances, &num_instances, &blas_beams[idx], 0, AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_BEAM); + append_blas(g_instances, &g_num_instances, &blas_beams[idx], 0, 0, + AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_BEAM); } if (cvar_pt_enable_sprites->integer != 0) { - append_blas(instances, &num_instances, &blas_sprites[idx], 0, AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_SPRITE); + append_blas(g_instances, &g_num_instances, &blas_sprites[idx], 0, 0, + AS_FLAG_EFFECTS, VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR, SBTO_SPRITE); } - uint32_t num_instances_effects = num_instances - num_instances_geometry; + uint32_t num_instances_effects = g_num_instances - num_instances_geometry; void *instance_data = buffer_map(buf_instances + idx); - memcpy(instance_data, &instances, sizeof(QvkGeometryInstance_t) * num_instances); + memcpy(instance_data, &g_instances, sizeof(QvkGeometryInstance_t) * g_num_instances); buffer_unmap(buf_instances + idx); instance_data = NULL; @@ -1075,7 +1030,8 @@ vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf) END_PERF_MARKER(cmd_buf, PROFILER_PRIMARY_RAYS); - BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VISBUF_A + frame_idx]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VISBUF_PRIM_A + frame_idx]); + BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_VISBUF_BARY_A + frame_idx]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_TRANSPARENT]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_MOTION]); BARRIER_COMPUTE(cmd_buf, qvk.images[VKPT_IMG_PT_SHADING_POSITION]); @@ -1215,8 +1171,7 @@ vkpt_pt_destroy() buffer_destroy(buf_instances + i); vkpt_pt_destroy_dynamic(i); } - - vkpt_pt_destroy_static(); + buffer_destroy(&buf_accel_scratch); vkDestroyDescriptorSetLayout(qvk.device, rt_descriptor_set_layout, NULL); vkDestroyPipelineLayout(qvk.device, rt_pipeline_layout, NULL); diff --git a/src/refresh/vkpt/shader/animate_materials.comp b/src/refresh/vkpt/shader/animate_materials.comp index 998d01707..46ea310d3 100644 --- a/src/refresh/vkpt/shader/animate_materials.comp +++ b/src/refresh/vkpt/shader/animate_materials.comp @@ -22,6 +22,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #version 460 #extension GL_GOOGLE_include_directive : enable +#extension GL_EXT_nonuniform_qualifier : enable #include "utils.glsl" @@ -41,7 +42,7 @@ main() if(prim >= global_ubo.num_static_primitives) return; - uint material = get_materials_bsp(prim); + uint material = primitive_buffers[VERTEX_BUFFER_WORLD].primitives[prim].material_id; uint new_material = material; MaterialInfo minfo = get_material_info(material); @@ -59,7 +60,7 @@ main() if(new_material != material) { - set_materials_bsp(prim, new_material); + primitive_buffers[VERTEX_BUFFER_WORLD].primitives[prim].material_id = new_material; } } diff --git a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp index 6494816e8..a656193a9 100644 --- a/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp +++ b/src/refresh/vkpt/shader/asvgf_gradient_reproject.comp @@ -45,7 +45,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "vertex_buffer.h" #include "asvgf.glsl" -#include "read_visbuf.glsl" // optimal group size determined experimentally on a 3090 #define GROUP_SIZE_GRAD 8 @@ -61,49 +60,43 @@ layout(local_size_x = GROUP_SIZE_PIXELS, local_size_y = GROUP_SIZE_PIXELS, local void patch_position(ivec2 ipos, ivec2 found_pos_prev) { // Read the visibility buffer - uvec2 vis_buf = texelFetch(TEX_PT_VISBUF_B, found_pos_prev, 0).xy; + uvec2 vis_buf = texelFetch(TEX_PT_VISBUF_PRIM_B, found_pos_prev, 0).xy; - Triangle triangle; + int static_instance_idx = -1; + uint buffer_idx; + uint primitive_id; - if (visbuf_is_static_prim(vis_buf.x)) + if (vis_buf.x == ~0u) // is a static primitive { - uint primitive_id = visbuf_get_static_prim(vis_buf.x); - triangle = get_bsp_triangle(primitive_id); + primitive_id = vis_buf.y; + buffer_idx = VERTEX_BUFFER_WORLD; } else { - uint instance_id_prev = visbuf_get_instance_id(vis_buf.x); - uint triangle_idx = visbuf_get_instance_prim(vis_buf.x); - uint primitive_id; + uint instance_id_prev = vis_buf.x; + uint triangle_idx = vis_buf.y; // Map the dynamic objects geometry data from the previous frame into the current frame - if (visbuf_is_world_instance(vis_buf.x)) - { - uint instance_id_curr = instance_buffer.world_prev_to_current[instance_id_prev]; - - // the object no longer exists - if (instance_id_curr == ~0u) - return; + uint instance_id_curr = instance_buffer.model_prev_to_current[instance_id_prev]; - uint buf_offset = instance_buffer.bsp_instance_buf_offset[instance_id_curr]; - primitive_id = buf_offset + triangle_idx; - } - else - { - uint instance_id_curr = instance_buffer.model_prev_to_current[instance_id_prev]; + // the object no longer exists + if(instance_id_curr == ~0u) + return; - // the object no longer exists - if(instance_id_curr == ~0u) - return; + ModelInstance mi = instance_buffer.model_instances[instance_id_curr]; + buffer_idx = mi.render_buffer_idx; + primitive_id = mi.render_prim_offset + triangle_idx; - uint buf_offset = instance_buffer.model_instance_buf_offset[instance_id_curr]; - primitive_id = buf_offset + triangle_idx; - } - - triangle = get_instanced_triangle(primitive_id); + // If it's a static (non-animated) mesh, the vertices need to be transformed + if (mi.render_buffer_idx == mi.source_buffer_idx) + static_instance_idx = int(instance_id_curr); } - vec3 bary = visbuf_unpack_barycentrics(vis_buf.y); + Triangle triangle = load_and_transform_triangle(static_instance_idx, buffer_idx, primitive_id); + + vec3 bary; + bary.yz = texelFetch(TEX_PT_VISBUF_BARY_B, found_pos_prev, 0).xy; + bary.x = clamp(1.0 - bary.y - bary.z, 0.0, 1.0); // Reconstruct the position based on the barycentrics vec3 position = triangle.positions * bary; diff --git a/src/refresh/vkpt/shader/constants.h b/src/refresh/vkpt/shader/constants.h index a515854fe..75ba45b76 100644 --- a/src/refresh/vkpt/shader/constants.h +++ b/src/refresh/vkpt/shader/constants.h @@ -103,10 +103,11 @@ with this program; if not, write to the Free Software Foundation, Inc., #define ENVIRONMENT_STATIC 1 #define ENVIRONMENT_DYNAMIC 2 -#define SHADER_MAX_ENTITIES 1024 -#define SHADER_MAX_BSP_ENTITIES 128 -#define MAX_LIGHT_SOURCES 32 -#define MAX_LIGHT_STYLES 64 +#define MAX_MODEL_INSTANCES 8192 // MAX_ENTITIES * (some number of geometries per model, usually 1) +#define MAX_RESERVED_INSTANCES 16 // TLAS instances reserved for skinned geometry, particles and the like +#define MAX_TLAS_INSTANCES (MAX_MODEL_INSTANCES + MAX_RESERVED_INSTANCES) +#define MAX_LIGHT_SOURCES 32 +#define MAX_LIGHT_STYLES 64 #define TLAS_INDEX_GEOMETRY 0 #define TLAS_INDEX_EFFECTS 1 @@ -123,10 +124,6 @@ with this program; if not, write to the Free Software Foundation, Inc., // Effects TLAS flags #define AS_FLAG_EFFECTS (1 << 0) -#define AS_INSTANCE_FLAG_DYNAMIC (1 << 23) -#define AS_INSTANCE_FLAG_SKY (1 << 22) -#define AS_INSTANCE_MASK_OFFSET (AS_INSTANCE_FLAG_SKY - 1) - #define RT_PAYLOAD_GEOMETRY 0 #define RT_PAYLOAD_EFFECTS 1 diff --git a/src/refresh/vkpt/shader/global_textures.h b/src/refresh/vkpt/shader/global_textures.h index 3c96bdfb8..45a8cb307 100644 --- a/src/refresh/vkpt/shader/global_textures.h +++ b/src/refresh/vkpt/shader/global_textures.h @@ -78,66 +78,70 @@ with this program; if not, write to the Free Software Foundation, Inc., #define NUM_IMAGES_BASE 37 #define LIST_IMAGES_A_B \ - IMG_DO(PT_VISBUF_A, NUM_IMAGES_BASE + 0, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_VISBUF_B, NUM_IMAGES_BASE + 1, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER_A, NUM_IMAGES_BASE + 2, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER_B, NUM_IMAGES_BASE + 3, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_BASE_COLOR_A, NUM_IMAGES_BASE + 4, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_BASE_COLOR_B, NUM_IMAGES_BASE + 5, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_METALLIC_A, NUM_IMAGES_BASE + 6, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_METALLIC_B, NUM_IMAGES_BASE + 7, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_A, NUM_IMAGES_BASE + 8, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_B, NUM_IMAGES_BASE + 9, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, NUM_IMAGES_BASE + 10, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_B, NUM_IMAGES_BASE + 11, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_A, NUM_IMAGES_BASE + 12, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_B, NUM_IMAGES_BASE + 13, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_A, NUM_IMAGES_BASE + 14, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_B, NUM_IMAGES_BASE + 15, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_A, NUM_IMAGES_BASE + 16, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_B, NUM_IMAGES_BASE + 17, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_A, NUM_IMAGES_BASE + 18, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ - IMG_DO(ASVGF_TAA_B, NUM_IMAGES_BASE + 19, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ - IMG_DO(ASVGF_RNG_SEED_A, NUM_IMAGES_BASE + 20, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_RNG_SEED_B, NUM_IMAGES_BASE + 21, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, NUM_IMAGES_BASE + 22, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, NUM_IMAGES_BASE + 23, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,NUM_IMAGES_BASE + 24, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,NUM_IMAGES_BASE + 25, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_A, NUM_IMAGES_BASE + 26, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_B, NUM_IMAGES_BASE + 27, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(PT_VISBUF_PRIM_A, NUM_IMAGES_BASE + 0, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_PRIM_B, NUM_IMAGES_BASE + 1, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_BARY_A, NUM_IMAGES_BASE + 2, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_BARY_B, NUM_IMAGES_BASE + 3, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_A, NUM_IMAGES_BASE + 4, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_B, NUM_IMAGES_BASE + 5, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_BASE_COLOR_A, NUM_IMAGES_BASE + 6, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_BASE_COLOR_B, NUM_IMAGES_BASE + 7, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_A, NUM_IMAGES_BASE + 8, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_B, NUM_IMAGES_BASE + 9, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_A, NUM_IMAGES_BASE + 10, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_B, NUM_IMAGES_BASE + 11, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, NUM_IMAGES_BASE + 12, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_B, NUM_IMAGES_BASE + 13, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, NUM_IMAGES_BASE + 14, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_B, NUM_IMAGES_BASE + 15, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_A, NUM_IMAGES_BASE + 16, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_B, NUM_IMAGES_BASE + 17, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_A, NUM_IMAGES_BASE + 18, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_B, NUM_IMAGES_BASE + 19, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_A, NUM_IMAGES_BASE + 20, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_TAA_B, NUM_IMAGES_BASE + 21, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_RNG_SEED_A, NUM_IMAGES_BASE + 22, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_RNG_SEED_B, NUM_IMAGES_BASE + 23, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, NUM_IMAGES_BASE + 24, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, NUM_IMAGES_BASE + 25, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,NUM_IMAGES_BASE + 26, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,NUM_IMAGES_BASE + 27, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_A, NUM_IMAGES_BASE + 28, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_B, NUM_IMAGES_BASE + 29, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ #define LIST_IMAGES_B_A \ - IMG_DO(PT_VISBUF_B, NUM_IMAGES_BASE + 0, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_VISBUF_A, NUM_IMAGES_BASE + 1, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER_B, NUM_IMAGES_BASE + 2, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_CLUSTER_A, NUM_IMAGES_BASE + 3, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_BASE_COLOR_B, NUM_IMAGES_BASE + 4, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_BASE_COLOR_A, NUM_IMAGES_BASE + 5, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_METALLIC_B, NUM_IMAGES_BASE + 6, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_METALLIC_A, NUM_IMAGES_BASE + 7, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_B, NUM_IMAGES_BASE + 8, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_VIEW_DEPTH_A, NUM_IMAGES_BASE + 9, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_B, NUM_IMAGES_BASE + 10, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_NORMAL_A, NUM_IMAGES_BASE + 11, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_B, NUM_IMAGES_BASE + 12, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(PT_GEO_NORMAL_A, NUM_IMAGES_BASE + 13, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_B, NUM_IMAGES_BASE + 14, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_FILTERED_SPEC_A, NUM_IMAGES_BASE + 15, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_B, NUM_IMAGES_BASE + 16, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_MOMENTS_HF_A, NUM_IMAGES_BASE + 17, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_TAA_B, NUM_IMAGES_BASE + 18, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ - IMG_DO(ASVGF_TAA_A, NUM_IMAGES_BASE + 19, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ - IMG_DO(ASVGF_RNG_SEED_B, NUM_IMAGES_BASE + 20, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_RNG_SEED_A, NUM_IMAGES_BASE + 21, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, NUM_IMAGES_BASE + 22, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, NUM_IMAGES_BASE + 23, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,NUM_IMAGES_BASE + 24, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,NUM_IMAGES_BASE + 25, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_B, NUM_IMAGES_BASE + 26, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - IMG_DO(ASVGF_GRAD_SMPL_POS_A, NUM_IMAGES_BASE + 27, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ - -#define NUM_IMAGES (NUM_IMAGES_BASE + 28) /* this really sucks but I don't know how to fix it + IMG_DO(PT_VISBUF_PRIM_B, NUM_IMAGES_BASE + 0, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_PRIM_A, NUM_IMAGES_BASE + 1, R32G32_UINT, rg32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_BARY_B, NUM_IMAGES_BASE + 2, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VISBUF_BARY_A, NUM_IMAGES_BASE + 3, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_B, NUM_IMAGES_BASE + 4, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_CLUSTER_A, NUM_IMAGES_BASE + 5, R16_UINT, r16ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_BASE_COLOR_B, NUM_IMAGES_BASE + 6, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_BASE_COLOR_A, NUM_IMAGES_BASE + 7, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_B, NUM_IMAGES_BASE + 8, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_METALLIC_A, NUM_IMAGES_BASE + 9, R8G8_UNORM, rg8, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_B, NUM_IMAGES_BASE + 10, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_VIEW_DEPTH_A, NUM_IMAGES_BASE + 11, R16_SFLOAT, r32f, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_B, NUM_IMAGES_BASE + 12, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_NORMAL_A, NUM_IMAGES_BASE + 13, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_B, NUM_IMAGES_BASE + 14, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(PT_GEO_NORMAL_A, NUM_IMAGES_BASE + 15, R32_UINT, r32ui, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_B, NUM_IMAGES_BASE + 16, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_FILTERED_SPEC_A, NUM_IMAGES_BASE + 17, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_B, NUM_IMAGES_BASE + 18, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_MOMENTS_HF_A, NUM_IMAGES_BASE + 19, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_TAA_B, NUM_IMAGES_BASE + 20, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_TAA_A, NUM_IMAGES_BASE + 21, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_TAA, IMG_HEIGHT_TAA ) \ + IMG_DO(ASVGF_RNG_SEED_B, NUM_IMAGES_BASE + 22, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_RNG_SEED_A, NUM_IMAGES_BASE + 23, R32_UINT, r32ui, IMG_WIDTH, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_B, NUM_IMAGES_BASE + 24, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_SH_A, NUM_IMAGES_BASE + 25, R16G16B16A16_SFLOAT, rgba16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_B,NUM_IMAGES_BASE + 26, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_HIST_COLOR_LF_COCG_A,NUM_IMAGES_BASE + 27, R16G16_SFLOAT, rg16f, IMG_WIDTH_MGPU, IMG_HEIGHT ) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_B, NUM_IMAGES_BASE + 28, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + IMG_DO(ASVGF_GRAD_SMPL_POS_A, NUM_IMAGES_BASE + 29, R32_UINT, r32ui, IMG_WIDTH_GRAD_MGPU, IMG_HEIGHT_GRAD) \ + +#define NUM_IMAGES (NUM_IMAGES_BASE + 30) /* this really sucks but I don't know how to fix it counting with enum does not work in GLSL */ // todo: make naming consistent! diff --git a/src/refresh/vkpt/shader/global_ubo.h b/src/refresh/vkpt/shader/global_ubo.h index 540d88bce..2a69fde8a 100644 --- a/src/refresh/vkpt/shader/global_ubo.h +++ b/src/refresh/vkpt/shader/global_ubo.h @@ -17,10 +17,11 @@ with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef _GLOBAL_UBO_DESCRIPTOR_SET_LAYOUT_H_ -#define _GLOBAL_UBO_DESCRIPTOR_SET_LAYOUT_H_ +#ifndef _GLOBAL_UBO_H_ +#define _GLOBAL_UBO_H_ #include "constants.h" +#include "shader_structs.h" #define GLOBAL_UBO_BINDING_IDX 0 #define GLOBAL_INSTANCE_BUFFER_BINDING_IDX 1 @@ -161,7 +162,7 @@ with this program; if not, write to the Free Software Foundation, Inc., \ GLOBAL_UBO_VAR_LIST_DO(int, num_sphere_lights) \ GLOBAL_UBO_VAR_LIST_DO(int , num_static_lights) \ - GLOBAL_UBO_VAR_LIST_DO(int, num_static_primitives) \ + GLOBAL_UBO_VAR_LIST_DO(uint, num_static_primitives) \ GLOBAL_UBO_VAR_LIST_DO(int, cluster_debug_index) \ \ GLOBAL_UBO_VAR_LIST_DO(int, water_normal_texture) \ @@ -225,134 +226,84 @@ with this program; if not, write to the Free Software Foundation, Inc., \ UBO_CVAR_LIST // WARNING: Do not put any other members into global_ubo after this: the CVAR list is not vec4-aligned -#define INSTANCE_BUFFER_VAR_LIST \ - INSTANCE_BUFFER_VAR_LIST_DO(int, model_indices [SHADER_MAX_ENTITIES + SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_current_to_prev [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_prev_to_current [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, world_current_to_prev [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, world_prev_to_current [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, bsp_prim_offset [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_idx_offset [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_cluster_id [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_cluster_id_prev [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, bsp_cluster_id [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, bsp_cluster_id_prev [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(ModelInstance, model_instances [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(ModelInstance, model_instances_prev [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(BspMeshInstance, bsp_mesh_instances [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(BspMeshInstance, bsp_mesh_instances_prev [SHADER_MAX_BSP_ENTITIES]) \ - /* stores the offset into the instance buffer in numberof primitives */ \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_instance_buf_offset[SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, model_instance_buf_size [SHADER_MAX_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, bsp_instance_buf_offset [SHADER_MAX_BSP_ENTITIES]) \ - INSTANCE_BUFFER_VAR_LIST_DO(uint, bsp_instance_buf_size [SHADER_MAX_BSP_ENTITIES]) \ -#ifndef VKPT_SHADER - -#if SHADER_MAX_ENTITIES != MAX_ENTITIES -#error need to update constant here -#endif - -typedef uint32_t uvec4_t[4]; -typedef int ivec4_t[4]; -typedef uint32_t uint; - -typedef struct { - float M[16]; // mat4 - - uint32_t material; - int offset_curr; - int offset_prev; // matrix offset for IQM - float backlerp; +BEGIN_SHADER_STRUCT( ModelInstance ) +{ + mat4 transform; + mat4 transform_prev; + uint material; + int cluster; + uint source_buffer_idx; + uint prim_count; + + uint prim_offset_curr_pose_curr_frame; + uint prim_offset_prev_pose_curr_frame; + uint prim_offset_curr_pose_prev_frame; + uint prim_offset_prev_pose_prev_frame; + + float pose_lerp_curr_frame; + float pose_lerp_prev_frame; + int iqm_matrix_offset_curr_frame; + int iqm_matrix_offset_prev_frame; + + int frame; float alpha; - int idx_offset; - int model_index; - int is_iqm; -} ModelInstance; - -typedef struct { - float M[16]; - int frame; float padding[3]; -} BspMeshInstance; + uint render_buffer_idx; + uint render_prim_offset; +} +END_SHADER_STRUCT( ModelInstance ) -typedef struct ShaderFogVolume { - vec3_t mins; +BEGIN_SHADER_STRUCT( ShaderFogVolume ) +{ + vec3 mins; uint is_active; - vec3_t maxs; + vec3 maxs; float pad2; - vec3_t color; + vec3 color; float pad3; - vec4_t density; -} ShaderFogVolume_t; + vec4 density; +} +END_SHADER_STRUCT( ShaderFogVolume ) + +BEGIN_SHADER_STRUCT( InstanceBuffer ) +{ + uint animated_model_indices [MAX_MODEL_INSTANCES]; + uint model_current_to_prev [MAX_MODEL_INSTANCES]; + uint model_prev_to_current [MAX_MODEL_INSTANCES]; + ModelInstance model_instances [MAX_MODEL_INSTANCES]; + uint tlas_instance_prim_offsets[MAX_TLAS_INSTANCES]; + int tlas_instance_model_indices[MAX_TLAS_INSTANCES]; +} +END_SHADER_STRUCT( InstanceBuffer ) + + +#ifndef VKPT_SHADER -#define int_t int32_t typedef struct QVKUniformBuffer_s { -#define GLOBAL_UBO_VAR_LIST_DO(type, name) type##_t name; +#define GLOBAL_UBO_VAR_LIST_DO(type, name) type name; GLOBAL_UBO_VAR_LIST #undef GLOBAL_UBO_VAR_LIST_DO } QVKUniformBuffer_t; -typedef struct QVKInstanceBuffer_s { -#define INSTANCE_BUFFER_VAR_LIST_DO(type, name) type name; - INSTANCE_BUFFER_VAR_LIST -#undef INSTANCE_BUFFER_VAR_LIST_DO -} QVKInstanceBuffer_t; -#undef int_t - #else -struct ModelInstance { - mat4 M; - - uint material; - int offset_curr; - int offset_prev; // matrix offset for IQM - float backlerp; - - float alpha; - int idx_offset; - int model_index; - int is_iqm; -}; - -struct BspMeshInstance { - mat4 M; - ivec4 frame; -}; - -struct ShaderFogVolume { - vec3 mins; - uint is_active; - vec3 maxs; - float pad2; - vec3 color; - float pad3; - vec4 density; -}; - struct GlobalUniformBuffer { #define GLOBAL_UBO_VAR_LIST_DO(type, name) type name; GLOBAL_UBO_VAR_LIST #undef GLOBAL_UBO_VAR_LIST_DO }; -struct GlobalUniformInstanceBuffer { -#define INSTANCE_BUFFER_VAR_LIST_DO(type, name) type name; - INSTANCE_BUFFER_VAR_LIST -#undef INSTANCE_BUFFER_VAR_LIST_DO -}; - layout(set = GLOBAL_UBO_DESC_SET_IDX, binding = GLOBAL_UBO_BINDING_IDX, std140) uniform UBO { GlobalUniformBuffer global_ubo; }; -layout(set = GLOBAL_UBO_DESC_SET_IDX, binding = GLOBAL_INSTANCE_BUFFER_BINDING_IDX) readonly buffer InstanceUBO { - GlobalUniformInstanceBuffer instance_buffer; +layout(set = GLOBAL_UBO_DESC_SET_IDX, binding = GLOBAL_INSTANCE_BUFFER_BINDING_IDX) readonly buffer InstanceSSBO { + InstanceBuffer instance_buffer; }; #endif #undef UBO_CVAR_DO -#endif /*_GLOBAL_UBO_DESCRIPTOR_SET_LAYOUT_H_*/ +#endif /*_GLOBAL_UBO_H_*/ diff --git a/src/refresh/vkpt/shader/god_rays.comp b/src/refresh/vkpt/shader/god_rays.comp index 015511831..dd480e3ca 100644 --- a/src/refresh/vkpt/shader/god_rays.comp +++ b/src/refresh/vkpt/shader/god_rays.comp @@ -34,13 +34,13 @@ with this program; if not, write to the Free Software Foundation, Inc., #define GOD_RAYS_DESC_SET_IDX 0 #include "god_rays_shared.h" +#define GLOBAL_UBO_DESC_SET_IDX 2 +#include "global_ubo.h" + #define VERTEX_BUFFER_DESC_SET_IDX 1 #define VERTEX_READONLY 1 #include "vertex_buffer.h" -#define GLOBAL_UBO_DESC_SET_IDX 2 -#include "global_ubo.h" - #define GLOBAL_TEXTURES_DESC_SET_IDX 3 #include "global_textures.h" diff --git a/src/refresh/vkpt/shader/indirect_lighting.rgen b/src/refresh/vkpt/shader/indirect_lighting.rgen index 9efcb9b94..883643860 100644 --- a/src/refresh/vkpt/shader/indirect_lighting.rgen +++ b/src/refresh/vkpt/shader/indirect_lighting.rgen @@ -266,7 +266,7 @@ indirect_lighting( indirect_specular_weight /= max(1, cone_size * global_ubo.pt_specular_anti_flicker * 0.01); } - if(is_sky(ray_payload_geometry)) + if(is_sky(triangle.material_id)) { // On the first bounce, analytic lights are processed in the direct lighting pass, so // check if `pt_direct_polygon_lights` is non-negative: @@ -359,7 +359,7 @@ indirect_lighting( float spotlight = sqrt(max(0, -dot(bounce_direction, bounce_normal))); emissive *= spotlight; - // See the comment for `is_analytic_light` above under `if(is_sky(ray_payload_geometry))` + // See the comment for `is_analytic_light` above under `if(is_sky(triangle.material_id))` // Same logic applies. bool is_analytic_light = (bounce_material_id & MATERIAL_FLAG_LIGHT) != 0 && diff --git a/src/refresh/vkpt/shader/instance_geometry.comp b/src/refresh/vkpt/shader/instance_geometry.comp index b619ac8f1..41dd0ec10 100644 --- a/src/refresh/vkpt/shader/instance_geometry.comp +++ b/src/refresh/vkpt/shader/instance_geometry.comp @@ -37,182 +37,85 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_BUFFER_DESC_SET_IDX 1 #include "vertex_buffer.h" -#include "read_visbuf.glsl" - -layout(set = 2, binding = 0) readonly buffer MODEL_VBO { - uint data[]; -} model_vbos[]; - -uint get_model_uint(uint model_id, uint offset) -{ - return model_vbos[nonuniformEXT(model_id)].data[offset]; -} - -vec2 get_model_float2(uint model_id, uint offset) -{ - vec2 result; - result.x = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 0]); - result.y = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 1]); - return result; -} - -vec3 get_model_float3(uint model_id, uint offset) -{ - vec3 result; - result.x = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 0]); - result.y = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 1]); - result.z = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 2]); - return result; -} - -vec4 get_model_float4(uint model_id, uint offset) -{ - vec4 result; - result.x = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 0]); - result.y = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 1]); - result.z = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 2]); - result.w = uintBitsToFloat(model_vbos[nonuniformEXT(model_id)].data[offset + 3]); - return result; -} - Triangle -get_model_triangle(uint model_id, uint prim_id, uint idx_offset, uint vert_offset) +get_iqm_triangle(uint model_id, uint prim_id, int matrix_offset) { - uint idx_base = idx_offset + prim_id * 3; - - uvec3 idx; - idx.x = model_vbos[nonuniformEXT(model_id)].data[idx_base + 0]; - idx.y = model_vbos[nonuniformEXT(model_id)].data[idx_base + 1]; - idx.z = model_vbos[nonuniformEXT(model_id)].data[idx_base + 2]; - - idx = idx * MODEL_VERTEX_SIZE + vert_offset; - - Triangle t; - t.positions[0] = get_model_float3(model_id, idx[0] + MODEL_VERTEX_POSITION); - t.positions[1] = get_model_float3(model_id, idx[1] + MODEL_VERTEX_POSITION); - t.positions[2] = get_model_float3(model_id, idx[2] + MODEL_VERTEX_POSITION); - - t.normals[0] = get_model_float3(model_id, idx[0] + MODEL_VERTEX_NORMAL); - t.normals[1] = get_model_float3(model_id, idx[1] + MODEL_VERTEX_NORMAL); - t.normals[2] = get_model_float3(model_id, idx[2] + MODEL_VERTEX_NORMAL); - - t.tex_coords[0] = get_model_float2(model_id, idx[0] + MODEL_VERTEX_TEXCOORD); - t.tex_coords[1] = get_model_float2(model_id, idx[1] + MODEL_VERTEX_TEXCOORD); - t.tex_coords[2] = get_model_float2(model_id, idx[2] + MODEL_VERTEX_TEXCOORD); - - t.tangents[0] = vec3(0); // computed later - t.tangents[1] = vec3(0); - t.tangents[2] = vec3(0); - - t.material_id = 0; // needs to come from uniform buffer - - t.alpha = 1.0; - t.texel_density = 0; - - return t; -} - -vec3 compute_tangent(mat3x3 positions, mat3x2 tex_coords, mat3x3 normals, out bool handedness) -{ - vec3 dp0 = positions[1] - positions[0]; - vec3 dp1 = positions[2] - positions[0]; - vec2 dt0 = tex_coords[1] - tex_coords[0]; - vec2 dt1 = tex_coords[2] - tex_coords[0]; - - float r = 1.f / (dt0.x * dt1.y - dt1.x * dt0.y); - - vec3 sdir = r * vec3( - dt1.y * dp0.x - dt0.y * dp1.x, - dt1.y * dp0.y - dt0.y * dp1.y, - dt1.y * dp0.z - dt0.y * dp1.z); - - vec3 tdir = r * vec3( - dt0.x * dp1.x - dt1.x * dp0.x, - dt0.x * dp1.y - dt1.x * dp0.y, - dt0.x * dp1.z - dt1.x * dp0.z); - - vec3 normal = (normals[0] + normals[1] + normals[2]) / 3.0; - - vec3 tangent = normalize(sdir - normal * dot(normal, sdir)); - - handedness = dot(cross(normal, tangent), tdir) < 0.0; - - return tangent; -} - -Triangle -get_iqm_triangle(uint model_id, uint prim_id, uint idx_offset, uint vert_offset, uint matrix_offset) -{ - uint idx_base = idx_offset + prim_id * 3; - - uvec3 idx; - idx.x = model_vbos[nonuniformEXT(model_id)].data[idx_base + 0]; - idx.y = model_vbos[nonuniformEXT(model_id)].data[idx_base + 1]; - idx.z = model_vbos[nonuniformEXT(model_id)].data[idx_base + 2]; - - idx = idx * IQM_VERTEX_SIZE + vert_offset; + VboPrimitive prim = primitive_buffers[nonuniformEXT(model_id)].primitives[prim_id]; Triangle t; - t.positions[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_POSITION); - t.positions[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_POSITION); - t.positions[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_POSITION); - - t.normals[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_NORMAL); - t.normals[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_NORMAL); - t.normals[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_NORMAL); - - t.tex_coords[0] = get_model_float2(model_id, idx[0] + IQM_VERTEX_TEXCOORD); - t.tex_coords[1] = get_model_float2(model_id, idx[1] + IQM_VERTEX_TEXCOORD); - t.tex_coords[2] = get_model_float2(model_id, idx[2] + IQM_VERTEX_TEXCOORD); - - t.tangents[0] = get_model_float3(model_id, idx[0] + IQM_VERTEX_TANGENT); - t.tangents[1] = get_model_float3(model_id, idx[1] + IQM_VERTEX_TANGENT); - t.tangents[2] = get_model_float3(model_id, idx[2] + IQM_VERTEX_TANGENT); - - if (dot(t.tangents[0], t.tangents[0]) == 0) // maybe tangents are missing? + t.positions[0] = prim.pos0; + t.positions[1] = prim.pos1; + t.positions[2] = prim.pos2; + + t.positions_prev = t.positions; + + t.normals[0] = decode_normal(prim.normals.x); + t.normals[1] = decode_normal(prim.normals.y); + t.normals[2] = decode_normal(prim.normals.z); + + t.tangents[0] = decode_normal(prim.tangents.x); + t.tangents[1] = decode_normal(prim.tangents.y); + t.tangents[2] = decode_normal(prim.tangents.z); + + t.tex_coords[0] = prim.uv0; + t.tex_coords[1] = prim.uv1; + t.tex_coords[2] = prim.uv2; + + t.material_id = prim.material_id; + t.cluster = prim.cluster; + t.instance_index = 0; + t.instance_prim = 0; + t.texel_density = prim.texel_density; + + vec2 emissive_and_alpha = unpackHalf2x16(prim.emissive_and_alpha); + t.emissive_factor = emissive_and_alpha.x; + t.alpha = emissive_and_alpha.y; + + for (int i_vtx = 0; i_vtx < 3; i_vtx++) { - bool handedness = false; - t.tangents[0].xyz = compute_tangent(t.positions, t.tex_coords, t.normals, handedness); - t.tangents[1].xyz = t.tangents[0].xyz; - t.tangents[2].xyz = t.tangents[0].xyz; - } + uint bone_indices; + uint bone_weights; + if (i_vtx == 0) + { + bone_indices = prim.custom0.x; + bone_weights = prim.custom0.y; + } + else if (i_vtx == 1) + { + bone_indices = prim.custom1.x; + bone_weights = prim.custom1.y; + } + else + { + bone_indices = prim.custom2.x; + bone_weights = prim.custom2.y; + } - if (matrix_offset >= 0) - { - for (int i_vtx = 0; i_vtx < 3; i_vtx++) + mat3x4 transform = mat3x4(0); + float weight_sum = 0; + + for (int i_bone = 0; i_bone < 4; i_bone++) { - mat3x4 transform = mat3x4(0); - uint bone_indices = get_model_uint(model_id, idx[i_vtx] + IQM_VERTEX_INDICES); - vec4 bone_weights = get_model_float4(model_id, idx[i_vtx] + IQM_VERTEX_WEIGHTS); - float weight_sum = 0; + uint bone_index = (bone_indices >> (i_bone * 8)) & 0xff; + float bone_weight = float((bone_weights >> (i_bone * 8)) & 0xff); // skip normalization for precision - for (int i_bone = 0; i_bone < 4; i_bone++) + if (bone_weight > 0) { - uint bone_index = (bone_indices >> (i_bone * 8)) & 0xff; - float bone_weight = bone_weights[i_bone]; - - if (bone_weight > 0) - { - mat3x4 m = get_iqm_matrix(matrix_offset + bone_index); - transform += m * bone_weight; - weight_sum += bone_weight; - } + mat3x4 m = get_iqm_matrix(matrix_offset + bone_index); + transform += m * bone_weight; + weight_sum += bone_weight; } + } - if (weight_sum > 0) // should always sum up to 1 by IQM definition, but maybe the data is missing altogether? - { - t.positions[i_vtx] = vec4(t.positions[i_vtx], 1.0) * transform; - t.normals[i_vtx] = normalize(vec4(t.normals[i_vtx], 0.0) * transform); - t.tangents[i_vtx] = normalize(vec4(t.tangents[i_vtx], 0.0) * transform); - } + if (weight_sum > 0) // should always sum up to 1 by IQM definition, but maybe the data is missing altogether? + { + float weight_sum_rcp = 1.0 / weight_sum; + t.positions[i_vtx] = (vec4(t.positions[i_vtx], 1.0) * weight_sum_rcp) * transform; + t.normals[i_vtx] = normalize(vec4(t.normals[i_vtx], 0.0) * transform); + t.tangents[i_vtx] = normalize(vec4(t.tangents[i_vtx], 0.0) * transform); } } - t.material_id = 0; // needs to come from uniform buffer - - t.alpha = 1.0; - t.texel_density = 0; - return t; } @@ -222,173 +125,88 @@ layout(local_size_x = LOCAL_SIZE_X, local_size_y = 1, local_size_z = 1) in; void main() -{ - uint instance_id = gl_WorkGroupID.x; - - int model_index = instance_buffer.model_indices[instance_id]; +{ + uint instance_index = instance_buffer.animated_model_indices[gl_WorkGroupID.x]; - bool is_world = (model_index & 0x80000000) != 0; - uint buf_offset, num_triangles; + ModelInstance mi = instance_buffer.model_instances[instance_index]; - if(is_world) + for(uint idx = gl_LocalInvocationID.x; idx < mi.prim_count; idx += LOCAL_SIZE_X) { - instance_id = ~model_index; - buf_offset = instance_buffer.bsp_instance_buf_offset[instance_id]; - num_triangles = instance_buffer.bsp_instance_buf_size[instance_id]; - } - else - { - instance_id = model_index; - buf_offset = instance_buffer.model_instance_buf_offset[instance_id]; - num_triangles = instance_buffer.model_instance_buf_size[instance_id]; - } + Triangle t; - for(uint idx = gl_LocalInvocationID.x; idx < num_triangles; idx += LOCAL_SIZE_X) - { - Triangle t_i; - mat4 M_curr = mat4(1.0); - mat4 M_prev = mat4(1.0); - - if(is_world) { - uint id = instance_id; - Triangle t = get_bsp_triangle(idx + instance_buffer.bsp_prim_offset[id]); - M_curr = instance_buffer.bsp_mesh_instances[id].M; - uint id_prev = instance_buffer.world_current_to_prev[id]; - M_prev = instance_buffer.bsp_mesh_instances_prev[id_prev].M; - - t_i.positions = t.positions; - t_i.positions_prev = t.positions; /* no vertex anim for bsp meshes */ - t_i.tangents = t.tangents; - t_i.normals = t.normals; - t_i.tex_coords = t.tex_coords; - t_i.texel_density = t.texel_density; - t_i.cluster = instance_buffer.bsp_cluster_id[id]; - - int frame = instance_buffer.bsp_mesh_instances[id].frame.x; - if(frame > 0) - { - uint material = t.material_id; - MaterialInfo minfo = get_material_info(material); - frame = frame % int(minfo.num_frames); + // See if the instance is skinned. + if (mi.iqm_matrix_offset_curr_frame >= 0) + { + // Perform bone-based skinning for two frames: + // Current frame... + t = get_iqm_triangle(mi.source_buffer_idx, + idx + mi.prim_offset_curr_pose_curr_frame, mi.iqm_matrix_offset_curr_frame); - while(frame --> 0) { - material = minfo.next_frame; - minfo = get_material_info(material); - } + // ... previous frame + Triangle t_prev = get_iqm_triangle(mi.source_buffer_idx, + idx + mi.prim_offset_curr_pose_prev_frame, mi.iqm_matrix_offset_prev_frame); - t_i.material_id = material | (t.material_id & ~MATERIAL_INDEX_MASK); // preserve flags - } - else - { - t_i.material_id = t.material_id; - } + // Use only the triangle positions from the previous frame + t.positions_prev = t_prev.positions; } - else { /* model */ - ModelInstance mi_curr = instance_buffer.model_instances[instance_id]; - M_curr = mi_curr.M; + else // Not skinned, so vertex animation. + { + // Interpolate vertex animations for two frames, from two poses in each frame + Triangle t_a_curr = load_triangle(mi.source_buffer_idx, idx + mi.prim_offset_curr_pose_curr_frame); + Triangle t_b_curr = load_triangle(mi.source_buffer_idx, idx + mi.prim_offset_prev_pose_curr_frame); + Triangle t_a_prev = load_triangle(mi.source_buffer_idx, idx + mi.prim_offset_curr_pose_prev_frame); + Triangle t_b_prev = load_triangle(mi.source_buffer_idx, idx + mi.prim_offset_prev_pose_prev_frame); - uint id_prev = instance_buffer.model_current_to_prev[instance_id]; - ModelInstance mi_prev = mi_curr; - if(id_prev != ~0u) + for (int vtx = 0; vtx < 3; vtx++) { - mi_prev = instance_buffer.model_instances_prev[id_prev]; - } + // Current position + t.positions[vtx] = mix(t_a_curr.positions[vtx], t_b_curr.positions[vtx], mi.pose_lerp_curr_frame); + t.positions[vtx] = vec3(mi.transform * vec4(t.positions[vtx], 1.0)); + // Previous position + t.positions_prev[vtx] = mix(t_a_prev.positions[vtx], t_b_prev.positions[vtx], mi.pose_lerp_prev_frame); + t.positions_prev[vtx] = vec3(mi.transform_prev * vec4(t.positions_prev[vtx], 1.0)); - if (mi_curr.is_iqm != 0) - { - t_i = get_iqm_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_curr, mi_curr.offset_prev); - - if (id_prev != ~0u) - { - Triangle t_prev = get_iqm_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_curr, mi_prev.offset_prev); - t_i.positions_prev = t_prev.positions; - M_prev = mi_prev.M; - } - else - { - t_i.positions_prev = t_i.positions; - M_prev = M_curr; - } - - t_i.alpha = mi_curr.alpha.x; - t_i.material_id = mi_curr.material; - // t_i.material_id |= MATERIAL_FLAG_HANDEDNESS; // not sure - t_i.cluster = instance_buffer.model_cluster_id[instance_id]; - } - else - { - { - /* read and interpolate triangles for model for _current_ frame */ - Triangle t = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_curr); - Triangle t_prev = get_model_triangle(mi_curr.model_index, idx, mi_curr.idx_offset, mi_curr.offset_prev); - - float backlerp = mi_curr.backlerp; - - t_i.positions[0] = mix(t.positions[0], t_prev.positions[0], backlerp); - t_i.positions[1] = mix(t.positions[1], t_prev.positions[1], backlerp); - t_i.positions[2] = mix(t.positions[2], t_prev.positions[2], backlerp); - - t_i.normals[0] = mix(t.normals[0], t_prev.normals[0], backlerp); - t_i.normals[1] = mix(t.normals[1], t_prev.normals[1], backlerp); - t_i.normals[2] = mix(t.normals[2], t_prev.normals[2], backlerp); - - t_i.tex_coords = t.tex_coords; - - bool handedness = false; - t_i.tangents[0].xyz = compute_tangent(t_i.positions, t_i.tex_coords, t_i.normals, handedness); - t_i.tangents[1].xyz = t_i.tangents[0].xyz; - t_i.tangents[2].xyz = t_i.tangents[0].xyz; - - t_i.alpha = mi_curr.alpha.x; - t_i.texel_density = t.texel_density; - - t_i.material_id = mi_curr.material; - t_i.material_id |= handedness ? MATERIAL_FLAG_HANDEDNESS : 0; - t_i.cluster = instance_buffer.model_cluster_id[instance_id]; - } - - if(id_prev != ~0u) - { - /* read and interpolate triangles for model for _previous_ frame */ - Triangle t = get_model_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_curr); - Triangle t_prev = get_model_triangle(mi_prev.model_index, idx, mi_prev.idx_offset, mi_prev.offset_prev); - M_prev = mi_prev.M; - - float backlerp = mi_prev.backlerp; - - t_i.positions_prev[0] = mix(t.positions[0], t_prev.positions[0], backlerp); - t_i.positions_prev[1] = mix(t.positions[1], t_prev.positions[1], backlerp); - t_i.positions_prev[2] = mix(t.positions[2], t_prev.positions[2], backlerp); - } - else - { - M_prev = M_curr; - - t_i.positions_prev[0] = t_i.positions[0]; - t_i.positions_prev[1] = t_i.positions[1]; - t_i.positions_prev[2] = t_i.positions[2]; - } + // Current normal + t.normals[vtx] = mix(t_a_curr.normals[vtx], t_b_curr.normals[vtx], mi.pose_lerp_curr_frame); + t.normals[vtx] = normalize(vec3(mi.transform * vec4(t.normals[vtx], 0.0))); + + // Current tangent + t.tangents[vtx] = mix(t_a_curr.tangents[vtx], t_b_curr.tangents[vtx], mi.pose_lerp_curr_frame); + t.tangents[vtx] = normalize(vec3(mi.transform * vec4(t.tangents[vtx], 0.0))); + + // Current texture coordinates - assume no texcoord animation + t.tex_coords[vtx] = t_a_curr.tex_coords[vtx]; } + + t.material_id = t_a_curr.material_id; + t.texel_density = t_a_curr.texel_density; + t.emissive_factor = t_a_curr.emissive_factor; + t.alpha = t_a_curr.alpha; } - t_i.positions[0] = vec3(M_curr * vec4(t_i.positions[0], 1.0)); - t_i.positions[1] = vec3(M_curr * vec4(t_i.positions[1], 1.0)); - t_i.positions[2] = vec3(M_curr * vec4(t_i.positions[2], 1.0)); + t.cluster = mi.cluster; + t.alpha *= mi.alpha; + t.instance_index = instance_index; + if (mi.material != 0) + t.material_id = mi.material; - t_i.positions_prev[0] = vec3(M_prev * vec4(t_i.positions_prev[0], 1.0)); - t_i.positions_prev[1] = vec3(M_prev * vec4(t_i.positions_prev[1], 1.0)); - t_i.positions_prev[2] = vec3(M_prev * vec4(t_i.positions_prev[2], 1.0)); + // Apply frame-based material animation: go through the linked list of materials. + int frame = mi.frame; + if (frame > 0) + { + uint material = t.material_id; + MaterialInfo minfo = get_material_info(material); + frame = frame % int(minfo.num_frames); - t_i.normals[0] = vec3(M_curr * vec4(t_i.normals[0], 0.0)); - t_i.normals[1] = vec3(M_curr * vec4(t_i.normals[1], 0.0)); - t_i.normals[2] = vec3(M_curr * vec4(t_i.normals[2], 0.0)); + while (frame --> 0) { + material = minfo.next_frame; + minfo = get_material_info(material); + } - t_i.tangents[0] = vec3(M_curr * vec4(t_i.tangents[0], 0.0)); - t_i.tangents[1] = vec3(M_curr * vec4(t_i.tangents[1], 0.0)); - t_i.tangents[2] = vec3(M_curr * vec4(t_i.tangents[2], 0.0)); + t.material_id = material | (t.material_id & ~MATERIAL_INDEX_MASK); // preserve flags + } - uint instance_triangle_id = visbuf_pack_instance(instance_id, idx, is_world); - store_instanced_triangle(t_i, instance_triangle_id, idx + buf_offset); + store_triangle(t, mi.render_buffer_idx, idx + mi.render_prim_offset); } } diff --git a/src/refresh/vkpt/shader/path_tracer.h b/src/refresh/vkpt/shader/path_tracer.h index b5bd30502..309d4ff56 100644 --- a/src/refresh/vkpt/shader/path_tracer.h +++ b/src/refresh/vkpt/shader/path_tracer.h @@ -134,22 +134,22 @@ Converting skyboxes to local lights provides two benefits: #define gl_RayFlagsSkipProceduralPrimitives 0x200 // not defined in GLSL #define INSTANCE_DYNAMIC_FLAG (1u << 31) -#define INSTANCE_SKY_FLAG (1u << 30) -#define PRIM_ID_MASK (~(INSTANCE_DYNAMIC_FLAG | INSTANCE_SKY_FLAG)) +#define PRIM_ID_MASK (~INSTANCE_DYNAMIC_FLAG) #define GLOBAL_UBO_DESC_SET_IDX 1 #include "global_ubo.h" layout (push_constant) uniform push_constant_block { - int gpu_index; - int bounce_index; + int gpu_index; + int bounce_index; } push_constants; struct RayPayloadGeometry { - vec2 barycentric; - uint instance_prim; - float hit_distance; + vec2 barycentric; + int buffer_and_instance_idx; + uint primitive_id; + float hit_distance; }; struct RayPayloadEffects { diff --git a/src/refresh/vkpt/shader/path_tracer.rchit b/src/refresh/vkpt/shader/path_tracer.rchit index ef1722e3a..80d23b9d9 100644 --- a/src/refresh/vkpt/shader/path_tracer.rchit +++ b/src/refresh/vkpt/shader/path_tracer.rchit @@ -40,5 +40,5 @@ hitAttributeEXT vec2 hit_attribs; void main() { - pt_logic_rchit(ray_payload, gl_PrimitiveID, gl_InstanceCustomIndexEXT, gl_HitTEXT, hit_attribs.xy); + pt_logic_rchit(ray_payload, gl_PrimitiveID, gl_InstanceID, gl_GeometryIndexEXT, gl_InstanceCustomIndexEXT, gl_HitTEXT, hit_attribs.xy); } diff --git a/src/refresh/vkpt/shader/path_tracer_explosion.rahit b/src/refresh/vkpt/shader/path_tracer_explosion.rahit index 679316d5e..fabef6fd6 100644 --- a/src/refresh/vkpt/shader/path_tracer_explosion.rahit +++ b/src/refresh/vkpt/shader/path_tracer_explosion.rahit @@ -37,7 +37,7 @@ hitAttributeEXT vec2 hit_attribs; void main() { - vec4 transparency = pt_logic_explosion(gl_PrimitiveID, gl_InstanceCustomIndexEXT, gl_WorldRayDirectionEXT, hit_attribs.xy); + vec4 transparency = pt_logic_explosion(gl_PrimitiveID, gl_InstanceID, gl_InstanceCustomIndexEXT, gl_WorldRayDirectionEXT, hit_attribs.xy); if (transparency.a > 0) { diff --git a/src/refresh/vkpt/shader/path_tracer_hit_shaders.h b/src/refresh/vkpt/shader/path_tracer_hit_shaders.h index 5a9fbd443..b544866d7 100644 --- a/src/refresh/vkpt/shader/path_tracer_hit_shaders.h +++ b/src/refresh/vkpt/shader/path_tracer_hit_shaders.h @@ -29,29 +29,43 @@ uniform utextureBuffer sprite_texure_buffer; layout(set = 0, binding = 4) uniform utextureBuffer beam_info_buffer; -void pt_logic_rchit(inout RayPayloadGeometry ray_payload, int primitiveID, uint instanceCustomIndex, float hitT, vec2 bary) +void get_model_index_and_prim_offset(int instanceID, int geometryIndex, out int model_index, out uint prim_offset) { - ray_payload.barycentric = bary.xy; - ray_payload.instance_prim = primitiveID + instanceCustomIndex & AS_INSTANCE_MASK_OFFSET; - if((instanceCustomIndex & AS_INSTANCE_FLAG_DYNAMIC) != 0) + model_index = instance_buffer.tlas_instance_model_indices[instanceID]; + if (model_index >= 0) { - ray_payload.instance_prim |= INSTANCE_DYNAMIC_FLAG; + model_index += geometryIndex; + prim_offset = instance_buffer.model_instances[model_index].render_prim_offset; } - if((instanceCustomIndex & AS_INSTANCE_FLAG_SKY) != 0) + else { - ray_payload.instance_prim |= INSTANCE_SKY_FLAG; + prim_offset = instance_buffer.tlas_instance_prim_offsets[instanceID]; } - ray_payload.hit_distance = hitT; } -bool pt_logic_masked(int primitiveID, uint instanceCustomIndex, vec2 bary) +void pt_logic_rchit(inout RayPayloadGeometry ray_payload, int primitiveID, int instanceID, int geometryIndex, uint instanceCustomIndex, float hitT, vec2 bary) { - Triangle triangle; - uint prim = primitiveID + instanceCustomIndex & AS_INSTANCE_MASK_OFFSET; - if ((instanceCustomIndex & AS_INSTANCE_FLAG_DYNAMIC) != 0) - triangle = get_instanced_triangle(prim); - else - triangle = get_bsp_triangle(prim); + int model_index; + uint prim_offset; + get_model_index_and_prim_offset(instanceID, geometryIndex, model_index, prim_offset); + + ray_payload.barycentric = bary.xy; + ray_payload.primitive_id = primitiveID + prim_offset; + ray_payload.buffer_and_instance_idx = (int(instanceCustomIndex) & 0xffff) + | (model_index << 16); + ray_payload.hit_distance = hitT; +} + +bool pt_logic_masked(int primitiveID, int instanceID, int geometryIndex, uint instanceCustomIndex, vec2 bary) +{ + int model_index; + uint prim_offset; + get_model_index_and_prim_offset(instanceID, geometryIndex, model_index, prim_offset); + + uint prim = primitiveID + prim_offset; + uint buffer_idx = instanceCustomIndex; + + Triangle triangle = load_and_transform_triangle(model_index, buffer_idx, prim); MaterialInfo minfo = get_material_info(triangle.material_id); @@ -153,10 +167,15 @@ vec4 pt_logic_sprite(int primitiveID, vec2 bary) return color; } -vec4 pt_logic_explosion(int primitiveID, uint instanceCustomIndex, vec3 worldRayDirection, vec2 bary) +vec4 pt_logic_explosion(int primitiveID, int instanceID, uint instanceCustomIndex, vec3 worldRayDirection, vec2 bary) { - const uint primitive_id = primitiveID + instanceCustomIndex & AS_INSTANCE_MASK_OFFSET; - const Triangle triangle = get_instanced_triangle(primitive_id); + // NOTE: The explosions use a different primitive addressing scheme from the other geometry. + // This is because the other geometry lives in the geometry TLAS, and the explosions are in the effects TLAS, + // which makes the instanceID values point to wrong entries in the tlas_instance_model_indices array. + // So the buffer index is fixed, and the prim offset is stored in instanceCustomIndex here. + const uint primitive_id = primitiveID + instanceCustomIndex; + const uint buffer_idx = VERTEX_BUFFER_INSTANCED; + const Triangle triangle = load_triangle(buffer_idx, primitive_id); const vec3 barycentric = vec3(1.0 - bary.x - bary.y, bary.x, bary.y); const vec2 tex_coord = triangle.tex_coords * barycentric; diff --git a/src/refresh/vkpt/shader/path_tracer_masked.rahit b/src/refresh/vkpt/shader/path_tracer_masked.rahit index a2be69621..03fcbddc0 100644 --- a/src/refresh/vkpt/shader/path_tracer_masked.rahit +++ b/src/refresh/vkpt/shader/path_tracer_masked.rahit @@ -36,6 +36,6 @@ hitAttributeEXT vec2 hit_attribs; void main() { - if (!pt_logic_masked(gl_PrimitiveID, gl_InstanceCustomIndexEXT, hit_attribs.xy)) + if (!pt_logic_masked(gl_PrimitiveID, gl_InstanceID, gl_GeometryIndexEXT, gl_InstanceCustomIndexEXT, hit_attribs.xy)) ignoreIntersectionEXT; } diff --git a/src/refresh/vkpt/shader/path_tracer_rgen.h b/src/refresh/vkpt/shader/path_tracer_rgen.h index bfdb2ff07..09c04748d 100644 --- a/src/refresh/vkpt/shader/path_tracer_rgen.h +++ b/src/refresh/vkpt/shader/path_tracer_rgen.h @@ -33,7 +33,6 @@ uniform accelerationStructureEXT topLevelAS[TLAS_COUNT]; #define VERTEX_READONLY 1 #include "vertex_buffer.h" -#include "read_visbuf.glsl" #include "asvgf.glsl" #include "brdf.glsl" #include "water.glsl" @@ -148,35 +147,16 @@ ivec2 get_image_size() bool found_intersection(RayPayloadGeometry rp) { - return rp.instance_prim != ~0u; -} - -bool -is_sky(RayPayloadGeometry rp) -{ - return (rp.instance_prim & INSTANCE_SKY_FLAG) != 0; -} - -bool -is_dynamic_instance(RayPayloadGeometry pay_load) -{ - return (pay_load.instance_prim & INSTANCE_DYNAMIC_FLAG) > 0; -} - -uint -get_primitive(RayPayloadGeometry pay_load) -{ - return pay_load.instance_prim & PRIM_ID_MASK; + return rp.primitive_id != ~0u; } Triangle get_hit_triangle(RayPayloadGeometry rp) { - uint prim = get_primitive(rp); - - return is_dynamic_instance(rp) - ? get_instanced_triangle(prim) - : get_bsp_triangle(prim); + return load_and_transform_triangle( + /* instance_idx = */ rp.buffer_and_instance_idx >> 16, + /* buffer_idx = */ rp.buffer_and_instance_idx & 0xffff, + rp.primitive_id); } vec3 @@ -236,6 +216,13 @@ is_chrome(uint material) return kind == MATERIAL_KIND_CHROME || kind == MATERIAL_KIND_CHROME_MODEL; } +bool +is_sky(uint material) +{ + uint kind = material & MATERIAL_KIND_MASK; + return kind == MATERIAL_KIND_SKY; +} + bool is_screen(uint material) { @@ -263,7 +250,8 @@ trace_geometry_ray(Ray ray, bool cull_back_faces, int instance_mask) rayFlags |= gl_RayFlagsSkipProceduralPrimitives; ray_payload_geometry.barycentric = vec2(0); - ray_payload_geometry.instance_prim = ~0u; + ray_payload_geometry.primitive_id = ~0u; + ray_payload_geometry.buffer_and_instance_idx = 0; ray_payload_geometry.hit_distance = 0; #ifdef KHR_RAY_QUERY @@ -277,6 +265,8 @@ trace_geometry_ray(Ray ray, bool cull_back_faces, int instance_mask) { uint sbtOffset = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQuery, false); int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); + int instanceID = rayQueryGetIntersectionInstanceIdEXT(rayQuery, false); + int geometryIndex = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, false); uint instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); float hitT = rayQueryGetIntersectionTEXT(rayQuery, false); vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); @@ -285,7 +275,7 @@ trace_geometry_ray(Ray ray, bool cull_back_faces, int instance_mask) switch(sbtOffset) { case SBTO_MASKED: - if (pt_logic_masked(primitiveID, instanceCustomIndex, bary)) + if (pt_logic_masked(primitiveID, instanceID, geometryIndex, instanceCustomIndex, bary)) rayQueryConfirmIntersectionEXT(rayQuery); break; } @@ -295,6 +285,8 @@ trace_geometry_ray(Ray ray, bool cull_back_faces, int instance_mask) { pt_logic_rchit(ray_payload_geometry, rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true), + rayQueryGetIntersectionInstanceIdEXT(rayQuery, true), + rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true), rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true), rayQueryGetIntersectionTEXT(rayQuery, true), rayQueryGetIntersectionBarycentricsEXT(rayQuery, true)); @@ -396,6 +388,7 @@ trace_effects_ray(Ray ray, bool skip_procedural) { uint sbtOffset = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQuery, false); int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); + int instanceID = rayQueryGetIntersectionInstanceIdEXT(rayQuery, false); uint instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); float hitT = rayQueryGetIntersectionTEXT(rayQuery, false); vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); @@ -433,7 +426,7 @@ trace_effects_ray(Ray ray, bool skip_procedural) break; case SBTO_EXPLOSION: // explosions - transparent = pt_logic_explosion(primitiveID, instanceCustomIndex, ray.direction, bary); + transparent = pt_logic_explosion(primitiveID, instanceID, instanceCustomIndex, ray.direction, bary); break; case SBTO_SPRITE: // sprites @@ -493,13 +486,15 @@ trace_shadow_ray(Ray ray, int cull_mask) { uint sbtOffset = rayQueryGetIntersectionInstanceShaderBindingTableRecordOffsetEXT(rayQuery, false); int primitiveID = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); + int instanceID = rayQueryGetIntersectionInstanceIdEXT(rayQuery, false); + int geometryIndex = rayQueryGetIntersectionGeometryIndexEXT(rayQuery, false); uint instanceCustomIndex = rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, false); vec2 bary = rayQueryGetIntersectionBarycentricsEXT(rayQuery, false); bool isProcedural = rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionAABBEXT; if (!isProcedural && sbtOffset == SBTO_MASKED) { - if (pt_logic_masked(primitiveID, instanceCustomIndex, bary)) + if (pt_logic_masked(primitiveID, instanceID, geometryIndex, instanceCustomIndex, bary)) rayQueryConfirmIntersectionEXT(rayQuery); } } @@ -512,7 +507,8 @@ trace_shadow_ray(Ray ray, int cull_mask) #else ray_payload_geometry.barycentric = vec2(0); - ray_payload_geometry.instance_prim = ~0u; + ray_payload_geometry.primitive_id = ~0u; + ray_payload_geometry.buffer_and_instance_idx = 0; ray_payload_geometry.hit_distance = -1; traceRayEXT( topLevelAS[TLAS_INDEX_GEOMETRY], rayFlags, cull_mask, @@ -528,7 +524,8 @@ vec3 trace_caustic_ray(Ray ray, int surface_medium) { ray_payload_geometry.barycentric = vec2(0); - ray_payload_geometry.instance_prim = ~0u; + ray_payload_geometry.primitive_id = ~0u; + ray_payload_geometry.buffer_and_instance_idx = 0; ray_payload_geometry.hit_distance = -1; @@ -547,6 +544,8 @@ trace_caustic_ray(Ray ray, int surface_medium) { pt_logic_rchit(ray_payload_geometry, rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, true), + rayQueryGetIntersectionInstanceIdEXT(rayQuery, true), + rayQueryGetIntersectionGeometryIndexEXT(rayQuery, true), rayQueryGetIntersectionInstanceCustomIndexEXT(rayQuery, true), rayQueryGetIntersectionTEXT(rayQuery, true), rayQueryGetIntersectionBarycentricsEXT(rayQuery, true)); @@ -606,6 +605,10 @@ trace_caustic_ray(Ray ray, int surface_medium) throughput = base_color; } + else + { + throughput = vec3(clamp(1.0 - triangle.alpha, 0.0, 1.0)); + } } //return vec3(caustic); diff --git a/src/refresh/vkpt/shader/primary_rays.rgen b/src/refresh/vkpt/shader/primary_rays.rgen index cc06f0174..7f5de35e0 100644 --- a/src/refresh/vkpt/shader/primary_rays.rgen +++ b/src/refresh/vkpt/shader/primary_rays.rgen @@ -158,8 +158,13 @@ main() trace_geometry_ray(ray, true, cull_mask); } + Triangle triangle; + if (found_intersection(ray_payload_geometry)) + { ray.t_max = ray_payload_geometry.hit_distance; + triangle = get_hit_triangle(ray_payload_geometry); + } vec4 effects = trace_effects_ray(ray, /* skip_procedural = */ false); @@ -167,7 +172,7 @@ main() // If the primary ray didn't hit anything, or it hit a sky polygon and pt_show_sky is disabled, // store the sky color and motion vectors. Doesn't apply to gradient samples because their rays intentionally miss. - if((!found_intersection(ray_payload_geometry) || (is_sky(ray_payload_geometry) && (global_ubo.pt_show_sky == 0)))) + if(!found_intersection(ray_payload_geometry) || is_sky(triangle.material_id) && (global_ubo.pt_show_sky == 0)) { vec3 env = env_map(ray.direction, false); env *= global_ubo.pt_env_scale; @@ -195,32 +200,21 @@ main() imageStore(IMG_PT_VIEW_DIRECTION, ipos, vec4(direction, 0)); imageStore(IMG_PT_SHADING_POSITION, ipos, vec4(global_ubo.cam_pos.xyz + direction * PRIMARY_RAY_T_MAX, 0)); imageStore(IMG_PT_MOTION, ipos, vec4(motion, 0, 0)); - imageStore(IMG_PT_VISBUF_A, ipos, uvec4(0)); + imageStore(IMG_PT_VISBUF_PRIM_A, ipos, uvec4(0)); + imageStore(IMG_PT_VISBUF_BARY_A, ipos, vec4(0)); imageStore(IMG_PT_BASE_COLOR_A, ipos, vec4(0)); imageStore(IMG_PT_TRANSPARENT, ipos, transparent); return; } - Triangle triangle; - vec3 bary; + vec3 bary = get_hit_barycentric(ray_payload_geometry); { - bool is_dynamic_primitive = is_dynamic_instance(ray_payload_geometry); - uint primitive_id = get_primitive(ray_payload_geometry); - bary = get_hit_barycentric(ray_payload_geometry); - uvec2 vis_buf; - vis_buf.x = is_dynamic_primitive - ? get_instance_id_instanced(primitive_id) - : visbuf_pack_static_prim(primitive_id); - vis_buf.y = visbuf_pack_barycentrics(bary); - - imageStore(IMG_PT_VISBUF_A, ipos, uvec4(vis_buf, 0, 0)); - - if(is_dynamic_primitive) - triangle = get_instanced_triangle(primitive_id); - else - triangle = get_bsp_triangle(primitive_id); + vis_buf.x = triangle.instance_index; + vis_buf.y = triangle.instance_prim; + imageStore(IMG_PT_VISBUF_PRIM_A, ipos, uvec4(vis_buf, 0, 0)); + imageStore(IMG_PT_VISBUF_BARY_A, ipos, vec4(bary.yz, 0, 0)); } if(is_readback_pixel) @@ -317,7 +311,7 @@ main() uint material_id = triangle.material_id; - if((is_chrome(material_id) || is_screen(material_id) || is_camera(material_id)) && primary_roughness >= MAX_MIRROR_ROUGHNESS) + if((is_chrome(material_id) || is_screen(material_id) || is_camera(material_id)) && primary_roughness >= MAX_MIRROR_ROUGHNESS || is_transparent(material_id)) { material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_REGULAR; } @@ -354,7 +348,6 @@ main() } } - bool primary_is_transparent = is_transparent(material_id); bool primary_is_screen = is_screen(material_id); if(is_screen(material_id) && luminance(primary_emissive) > 0) @@ -380,10 +373,23 @@ main() } } - if(is_transparent(material_id) && !is_odd_checkerboard) + if (triangle.alpha < 1.0) { + // Translucent objects: split the path. + throughput *= 2; checkerboard_flags = CHECKERBOARD_FLAG_PRIMARY | CHECKERBOARD_FLAG_REFRACTION; - material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_REGULAR; + + if (!is_odd_checkerboard) + { + // This field stays on the surface. + throughput *= triangle.alpha; + } + else + { + // This field goes through the surface. + material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_TRANSPARENT; + throughput *= 1.0 - triangle.alpha; + } } if(is_water(material_id) || is_slime(material_id)) @@ -444,7 +450,7 @@ main() // Start the transparency accumulation from the primary surface emissive component, with zero alpha vec4 transparent = vec4(primary_emissive * throughput, 0); - if(global_ubo.pt_show_sky != 0 && is_sky(ray_payload_geometry)) + if(global_ubo.pt_show_sky != 0 && is_sky(triangle.material_id)) { // show additional information about sky boxes: triangle edges... if(any(lessThan(bary, vec3(0.02)))) diff --git a/src/refresh/vkpt/shader/read_visbuf.glsl b/src/refresh/vkpt/shader/read_visbuf.glsl deleted file mode 100644 index b573262af..000000000 --- a/src/refresh/vkpt/shader/read_visbuf.glsl +++ /dev/null @@ -1,80 +0,0 @@ -/* -Copyright (C) 2018 Christoph Schied -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along -with this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -*/ - -#define VISBUF_INSTANCE_ID_MASK 0x000003FF -#define VISBUF_INSTANCE_PRIM_MASK 0x3FFFFC00 -#define VISBUF_INSTANCE_PRIM_SHIFT 10 -#define VISBUF_STATIC_PRIM_MASK 0x3FFFFFFF -#define VISBUF_WORLD_INSTANCE_FLAG 0x40000000 -#define VISBUF_STATIC_PRIM_FLAG 0x80000000 - -uint visbuf_pack_instance(uint instance_id, uint primitive_id, bool is_world_instance) -{ - return (instance_id & VISBUF_INSTANCE_ID_MASK) - | ((primitive_id << VISBUF_INSTANCE_PRIM_SHIFT) & VISBUF_INSTANCE_PRIM_MASK) - | (is_world_instance ? VISBUF_WORLD_INSTANCE_FLAG : 0); -} - -uint visbuf_pack_static_prim(uint primitive_id) -{ - return (primitive_id & VISBUF_STATIC_PRIM_MASK) - | VISBUF_STATIC_PRIM_FLAG; -} - -uint visbuf_get_instance_id(uint u) -{ - return u & VISBUF_INSTANCE_ID_MASK; -} - -uint visbuf_get_instance_prim(uint u) -{ - return (u & VISBUF_INSTANCE_PRIM_MASK) >> VISBUF_INSTANCE_PRIM_SHIFT; -} - -uint visbuf_get_static_prim(uint u) -{ - return u & VISBUF_STATIC_PRIM_MASK; -} - -bool visbuf_is_world_instance(uint u) -{ - return (u & VISBUF_WORLD_INSTANCE_FLAG) != 0; -} - -bool visbuf_is_static_prim(uint u) -{ - return (u & VISBUF_STATIC_PRIM_FLAG) != 0; -} - -uint visbuf_pack_barycentrics(vec3 bary) -{ - uvec2 encoded = uvec2(round(clamp(bary.yz, vec2(0), vec2(1)) * 0xFFFF)); - return encoded.x | (encoded.y << 16); -} - -vec3 visbuf_unpack_barycentrics(uint u) -{ - uvec2 encoded = uvec2(u & 0xFFFF, u >> 16); - - vec3 bary; - bary.yz = vec2(encoded) / 0xFFFF; - bary.x = clamp(1.0 - (bary.y + bary.z), 0.0, 1.0); - - return bary; -} \ No newline at end of file diff --git a/src/refresh/vkpt/shader/reflect_refract.rgen b/src/refresh/vkpt/shader/reflect_refract.rgen index b774a6080..afbb47804 100644 --- a/src/refresh/vkpt/shader/reflect_refract.rgen +++ b/src/refresh/vkpt/shader/reflect_refract.rgen @@ -287,7 +287,8 @@ main() if(spec_bounce_index == 0) { - checkerboard_flags = CHECKERBOARD_FLAG_REFLECTION; + if (checkerboard_flags == CHECKERBOARD_FLAG_PRIMARY) + checkerboard_flags = CHECKERBOARD_FLAG_REFLECTION; // Don't compute MVs or do surface replacement for dynamic models. // Dynamic models are usually moving or curved, which makes MVs incorrect, @@ -301,7 +302,6 @@ main() // This is a non-physical see-through material. // One ray stops at the primary surface, the other goes through it. - checkerboard_flags = CHECKERBOARD_FLAG_PRIMARY | CHECKERBOARD_FLAG_REFRACTION; correct_motion_vector = 2; } else @@ -464,7 +464,7 @@ main() int reflection_cull_mask = REFLECTION_RAY_CULL_MASK; if(global_ubo.first_person_model != 0 && include_player_model) reflection_cull_mask |= AS_FLAG_VIEWER_MODELS; - else + else if (!(primary_is_weapon && primary_is_transparent)) reflection_cull_mask |= AS_FLAG_VIEWER_WEAPON; if(push_constants.bounce_index < int(global_ubo.pt_reflect_refract - 1)) @@ -485,13 +485,18 @@ main() // Add the transparency encountered along the reflection ray + Triangle triangle; + if (found_intersection(ray_payload_geometry)) + { reflection_ray.t_max = ray_payload_geometry.hit_distance; + triangle = get_hit_triangle(ray_payload_geometry); + } vec4 effects = trace_effects_ray(reflection_ray, /* skip_procedural = */ false); transparent = alpha_blend_premultiplied(transparent, effects * vec4(throughput, 1)); - if(is_sky(ray_payload_geometry)) + if(!found_intersection(ray_payload_geometry) || is_sky(triangle.material_id)) { // Reflection ray hit the sky - store an empty surface into the G-buffer, // blend the environment under the transparency. @@ -528,7 +533,6 @@ main() throughput *= extinction(primary_medium, ray_payload_geometry.hit_distance); } - Triangle triangle = get_hit_triangle(ray_payload_geometry); vec3 bary = get_hit_barycentric(ray_payload_geometry); vec2 tex_coord = triangle.tex_coords * bary; vec3 new_geo_normal = normalize(triangle.normals * bary); @@ -655,7 +659,7 @@ main() primary_is_weapon = false; - if((is_chrome(material_id) || is_screen(material_id) || is_camera(material_id)) && primary_roughness >= MAX_MIRROR_ROUGHNESS) + if((is_chrome(material_id) || is_screen(material_id) || is_camera(material_id)) && primary_roughness >= MAX_MIRROR_ROUGHNESS || is_transparent(material_id)) { material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_REGULAR; } @@ -701,21 +705,51 @@ main() } } + if (triangle.alpha < 1.0) + { + // Translucent objects. + if (bitCount(checkerboard_flags & CHECKERBOARD_FLAG_FIELD_MASK) == 1) + { + // Split the path if it wasn't split before. + throughput *= 2; + checkerboard_flags = CHECKERBOARD_FLAG_PRIMARY | CHECKERBOARD_FLAG_REFRACTION; + + if (!is_odd_checkerboard) + { + // This field stays on the surface. + throughput *= triangle.alpha; + } + else + { + // This field goes through the surface. + material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_TRANSPARENT; + throughput *= 1.0 - triangle.alpha; + } + } + else + { + // If the path was split *on the same model* before, continue tracing. + // We don't want to show the internal geometry of a translucent model. + uint primary_instance_index = imageLoad(IMG_PT_VISBUF_PRIM_A, ipos).x; + if (primary_instance_index == triangle.instance_index) + { + material_id = (material_id & ~MATERIAL_KIND_MASK) | MATERIAL_KIND_TRANSPARENT; + } + } + } + if(luminance(primary_emissive) > 0) { // Emissive component on the reflected/refracted surface - blend it with zero alpha transparent = alpha_blend_premultiplied(transparent, vec4(primary_emissive * throughput, 0)); } - bool is_dynamic_primitive = is_dynamic_instance(ray_payload_geometry); - uint primitive_id = get_primitive(ray_payload_geometry); - - uvec2 vis_buf; - vis_buf.x = is_dynamic_primitive - ? get_instance_id_instanced(primitive_id) - : visbuf_pack_static_prim(primitive_id); - vis_buf.y = visbuf_pack_barycentrics(bary); - - imageStore(IMG_PT_VISBUF_A, ipos, uvec4(vis_buf, 0, 0)); + { + uvec2 vis_buf; + vis_buf.x = triangle.instance_index; + vis_buf.y = triangle.instance_prim; + imageStore(IMG_PT_VISBUF_PRIM_A, ipos, uvec4(vis_buf, 0, 0)); + imageStore(IMG_PT_VISBUF_BARY_A, ipos, vec4(bary.yz, 0, 0)); + } // Store the surface parameters into the G-buffer for the indirect lighting shader if(is_camera(material_id)) diff --git a/src/refresh/vkpt/shader/shader_structs.h b/src/refresh/vkpt/shader/shader_structs.h new file mode 100644 index 000000000..67be66567 --- /dev/null +++ b/src/refresh/vkpt/shader/shader_structs.h @@ -0,0 +1,46 @@ +/* +Copyright (C) 2021, NVIDIA CORPORATION. All rights reserved. + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#ifndef SHADER_STRUCTS_H + +#ifdef VKPT_SHADER + +#define BEGIN_SHADER_STRUCT(NAME) struct NAME +#define END_SHADER_STRUCT(NAME) ; + +#else // VKPT_SHADER + +#define BEGIN_SHADER_STRUCT(NAME) typedef struct +#define END_SHADER_STRUCT(NAME) NAME; + +typedef uint32_t uint; +typedef float vec2[2]; +typedef float vec3[3]; +typedef float vec4[4]; +typedef uint32_t uvec2[2]; +typedef uint32_t uvec3[3]; +typedef uint32_t uvec4[4]; +typedef int ivec2[2]; +typedef int ivec3[3]; +typedef int ivec4[4]; +typedef float mat3[3][3]; +typedef float mat4[4][4]; + +#endif // VKPT_SHADER + +#endif // SHADER_STRUCTS_H \ No newline at end of file diff --git a/src/refresh/vkpt/shader/vertex_buffer.h b/src/refresh/vkpt/shader/vertex_buffer.h index bfcde2151..f407da9cb 100644 --- a/src/refresh/vkpt/shader/vertex_buffer.h +++ b/src/refresh/vkpt/shader/vertex_buffer.h @@ -1,6 +1,6 @@ /* Copyright (C) 2018 Christoph Schied -Copyright (C) 2019, NVIDIA CORPORATION. All rights reserved. +Copyright (C) 2019-2021, NVIDIA CORPORATION. All rights reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -20,11 +20,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #ifndef _VERTEX_BUFFER_H_ #define _VERTEX_BUFFER_H_ -#define MAX_VERT_BSP (1 << 21) - -#define MAX_VERT_MODEL (1 << 23) -#define MAX_IDX_MODEL (1 << 22) -#define MAX_PRIM_MODEL (MAX_IDX_MODEL / 3) +#include "shader_structs.h" #define MAX_LIGHT_LISTS (1 << 14) #define MAX_LIGHT_LIST_NODES (1 << 19) @@ -42,8 +38,8 @@ with this program; if not, write to the Free Software Foundation, Inc., #define ALIGN_SIZE_4(x, n) ((x * n + 3) & (~3)) -#define BSP_VERTEX_BUFFER_BINDING_IDX 0 -#define MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX 1 +#define PRIMITIVE_BUFFER_BINDING_IDX 0 +#define POSITION_BUFFER_BINDING_IDX 1 #define LIGHT_BUFFER_BINDING_IDX 2 #define IQM_MATRIX_BUFFER_BINDING_IDX 3 #define READBACK_BUFFER_BINDING_IDX 4 @@ -52,43 +48,75 @@ with this program; if not, write to the Free Software Foundation, Inc., #define SUN_COLOR_UBO_BINDING_IDX 7 #define LIGHT_STATS_BUFFER_BINDING_IDX 8 +#define VERTEX_BUFFER_WORLD 0 +#define VERTEX_BUFFER_INSTANCED 1 +#define VERTEX_BUFFER_FIRST_MODEL 2 + #define SUN_COLOR_ACCUMULATOR_FIXED_POINT_SCALE 0x100000 #define SKY_COLOR_ACCUMULATOR_FIXED_POINT_SCALE 0x100 +// A structure that is used in primitive buffers to store complete information about one triangle. +// Its size is 8x float4 or 128 bytes to align with GPU cache lines. +// Path tracing accesses the primitive information in a very incoherent way, where every thread +// is likely to read a different primitive. Packing the info into one struct should reduce the +// total traffic from video memory by reading entire cache lines instead of sparse values from +// different buffers. +BEGIN_SHADER_STRUCT( VboPrimitive ) +{ + vec3 pos0; + uint material_id; + + vec3 pos1; + int cluster; + + vec3 pos2; + float texel_density; + + uvec3 normals; + uint instance; + + uvec3 tangents; + uint emissive_and_alpha; + + vec2 uv0; + vec2 uv1; + vec2 uv2; + uvec2 custom0; // The custom fields store motion for instanced meshes in the animated buffer, + uvec2 custom1; // or blend indices and weights for skinned meshes before they're animated. + uvec2 custom2; +} +END_SHADER_STRUCT( VboPrimitive ) + + #ifdef VKPT_SHADER -#define uint32_t uint + +#ifdef VERTEX_READONLY +#define VERTEX_READONLY_FLAG readonly +#else +#define VERTEX_READONLY_FLAG #endif -#define BSP_VERTEX_BUFFER_LIST \ - VERTEX_BUFFER_LIST_DO(float, 3, positions_bsp, (MAX_VERT_BSP )) \ - VERTEX_BUFFER_LIST_DO(float, 2, tex_coords_bsp, (MAX_VERT_BSP )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, normals_bsp, (MAX_VERT_BSP )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, tangents_bsp, (MAX_VERT_BSP )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, materials_bsp, (MAX_VERT_BSP / 3 )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, clusters_bsp, (MAX_VERT_BSP / 3 )) \ - VERTEX_BUFFER_LIST_DO(float, 1, texel_density_bsp, (MAX_VERT_BSP / 3 )) \ - VERTEX_BUFFER_LIST_DO(float, 1, emissive_factors_bsp, (MAX_VERT_BSP / 3 )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, sky_visibility, (MAX_LIGHT_LISTS / 32)) \ - -#define MODEL_DYNAMIC_VERTEX_BUFFER_LIST \ - VERTEX_BUFFER_LIST_DO(float, 3, positions_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(float, 3, pos_prev_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, normals_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, tangents_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(float, 2, tex_coords_instanced, (MAX_VERT_MODEL )) \ - VERTEX_BUFFER_LIST_DO(float, 1, alpha_instanced, (MAX_PRIM_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, clusters_instanced, (MAX_PRIM_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, materials_instanced, (MAX_PRIM_MODEL )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, instance_id_instanced, (MAX_PRIM_MODEL )) \ - VERTEX_BUFFER_LIST_DO(float, 1, texel_density_instanced, (MAX_PRIM_MODEL )) \ +// The buffers with primitive data, currently two of them: world and instanced. +// They are stored in an array to allow branchless access with nonuniformEXT. +layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = PRIMITIVE_BUFFER_BINDING_IDX) VERTEX_READONLY_FLAG buffer PRIMITIVE_BUFFER { + VboPrimitive primitives[]; +} primitive_buffers[]; + +// The buffer with just the position data for animated models. +layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = POSITION_BUFFER_BINDING_IDX) VERTEX_READONLY_FLAG buffer POSITION_BUFFER { + float positions[]; +} instanced_position_buffer; + +#endif #define LIGHT_BUFFER_LIST \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, material_table, (MAX_PBR_MATERIALS * MATERIAL_UINTS)) \ + VERTEX_BUFFER_LIST_DO(uint, 1, material_table, (MAX_PBR_MATERIALS * MATERIAL_UINTS)) \ VERTEX_BUFFER_LIST_DO(float, 4, light_polys, (MAX_LIGHT_POLYS * LIGHT_POLY_VEC4S)) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, light_list_offsets, (MAX_LIGHT_LISTS )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, light_list_lights, (MAX_LIGHT_LIST_NODES)) \ + VERTEX_BUFFER_LIST_DO(uint, 1, light_list_offsets, (MAX_LIGHT_LISTS )) \ + VERTEX_BUFFER_LIST_DO(uint, 1, light_list_lights, (MAX_LIGHT_LIST_NODES)) \ VERTEX_BUFFER_LIST_DO(float, 1, light_styles, (MAX_LIGHT_STYLES )) \ - VERTEX_BUFFER_LIST_DO(uint32_t, 1, cluster_debug_mask, (MAX_LIGHT_LISTS / 32)) \ + VERTEX_BUFFER_LIST_DO(uint, 1, cluster_debug_mask, (MAX_LIGHT_LISTS / 32)) \ + VERTEX_BUFFER_LIST_DO(uint, 1, sky_visibility, (MAX_LIGHT_LISTS / 32)) \ #define IQM_MATRIX_BUFFER_LIST \ VERTEX_BUFFER_LIST_DO(float, 4, iqm_matrices, (MAX_IQM_MATRICES)) \ @@ -96,16 +124,6 @@ with this program; if not, write to the Free Software Foundation, Inc., #define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ type name[ALIGN_SIZE_4(size, dim)]; -struct BspVertexBuffer -{ - BSP_VERTEX_BUFFER_LIST -}; - -struct ModelDynamicVertexBuffer -{ - MODEL_DYNAMIC_VERTEX_BUFFER_LIST -}; - struct LightBuffer { LIGHT_BUFFER_LIST @@ -139,8 +157,8 @@ typedef int ivec4_t[4]; struct ReadbackBuffer { - uint32_t material; - uint32_t cluster; + uint material; + uint cluster; float sun_luminance; float sky_luminance; vec3_t hdr_color; @@ -162,42 +180,11 @@ struct SunColorBuffer }; #ifndef VKPT_SHADER -typedef struct BspVertexBuffer BspVertexBuffer; -typedef struct ModelDynamicVertexBuffer ModelDynamicVertexBuffer; typedef struct LightBuffer LightBuffer; typedef struct IqmMatrixBuffer IqmMatrixBuffer; typedef struct ReadbackBuffer ReadbackBuffer; typedef struct ToneMappingBuffer ToneMappingBuffer; typedef struct SunColorBuffer SunColorBuffer; - -typedef struct { - vec3_t position; - vec3_t normal; - vec2_t texcoord; -} model_vertex_t; - -typedef struct -{ - vec3_t position; - vec3_t normal; - vec2_t texcoord; - vec3_t tangent; - uint32_t blend_indices; - vec4_t blend_weights; -} iqm_vertex_t; -#else -#define MODEL_VERTEX_SIZE 8 -#define MODEL_VERTEX_POSITION 0 -#define MODEL_VERTEX_NORMAL 3 -#define MODEL_VERTEX_TEXCOORD 6 - -#define IQM_VERTEX_SIZE 16 -#define IQM_VERTEX_POSITION 0 -#define IQM_VERTEX_NORMAL 3 -#define IQM_VERTEX_TEXCOORD 6 -#define IQM_VERTEX_TANGENT 8 -#define IQM_VERTEX_INDICES 11 -#define IQM_VERTEX_WEIGHTS 12 #endif #ifdef VKPT_SHADER @@ -227,26 +214,6 @@ struct LightPolygon float prev_style_scale; }; -#ifdef VERTEX_READONLY -layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = BSP_VERTEX_BUFFER_BINDING_IDX) readonly buffer BSP_VERTEX_BUFFER { - BspVertexBuffer vbo_bsp; -}; -#else -layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = BSP_VERTEX_BUFFER_BINDING_IDX) buffer BSP_VERTEX_BUFFER { - BspVertexBuffer vbo_bsp; -}; -#endif - -#ifdef VERTEX_READONLY -layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX) readonly buffer MODEL_DYNAMIC_VERTEX_BUFFER { - ModelDynamicVertexBuffer vbo_model_dynamic; -}; -#else -layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX) buffer MODEL_DYNAMIC_VERTEX_BUFFER { - ModelDynamicVertexBuffer vbo_model_dynamic; -}; -#endif - layout(set = VERTEX_BUFFER_DESC_SET_IDX, binding = LIGHT_BUFFER_BINDING_IDX) readonly buffer LIGHT_BUFFER { LightBuffer lbo; }; @@ -304,21 +271,21 @@ get_##name(uint idx) \ return vec4(buf.name[idx * 4 + 0], buf.name[idx * 4 + 1], buf.name[idx * 4 + 2], buf.name[idx * 4 + 3]); \ } -#define GET_uint32_t_1(buf,name) \ +#define GET_uint_1(buf,name) \ uint \ get_##name(uint idx) \ { \ return buf.name[idx]; \ } -#define GET_uint32_t_3(buf,name) \ +#define GET_uint_3(buf,name) \ uvec3 \ get_##name(uint idx) \ { \ return uvec3(buf.name[idx * 3 + 0], buf.name[idx * 3 + 1], buf.name[idx * 3 + 2]); \ } -#define GET_uint32_t_4(buf,name) \ +#define GET_uint_4(buf,name) \ uvec4 \ get_##name(uint idx) \ { \ @@ -360,14 +327,14 @@ set_##name(uint idx, vec4 v) \ buf.name[idx * 4 + 3] = v[3]; \ } -#define SET_uint32_t_1(buf,name) \ +#define SET_uint_1(buf,name) \ void \ set_##name(uint idx, uint u) \ { \ buf.name[idx] = u; \ } -#define SET_uint32_t_3(buf,name) \ +#define SET_uint_3(buf,name) \ void \ set_##name(uint idx, uvec3 v) \ { \ @@ -377,32 +344,6 @@ set_##name(uint idx, uvec3 v) \ } #endif -#ifdef VERTEX_READONLY -#define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ - GET_##type##_##dim(vbo_bsp,name) -BSP_VERTEX_BUFFER_LIST -#undef VERTEX_BUFFER_LIST_DO -#else -#define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ - GET_##type##_##dim(vbo_bsp,name) \ - SET_##type##_##dim(vbo_bsp,name) -BSP_VERTEX_BUFFER_LIST -#undef VERTEX_BUFFER_LIST_DO -#endif - -#ifdef VERTEX_READONLY -#define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ - GET_##type##_##dim(vbo_model_dynamic,name) -MODEL_DYNAMIC_VERTEX_BUFFER_LIST -#undef VERTEX_BUFFER_LIST_DO -#else -#define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ - GET_##type##_##dim(vbo_model_dynamic,name) \ - SET_##type##_##dim(vbo_model_dynamic,name) -MODEL_DYNAMIC_VERTEX_BUFFER_LIST -#undef VERTEX_BUFFER_LIST_DO -#endif - #define VERTEX_BUFFER_LIST_DO(type, dim, name, size) \ GET_##type##_##dim(lbo,name) LIGHT_BUFFER_LIST @@ -421,117 +362,155 @@ struct Triangle mat3x2 tex_coords; mat3x3 tangents; uint material_id; - uint cluster; - float alpha; + int cluster; + uint instance_index; + uint instance_prim; float texel_density; float emissive_factor; + float alpha; }; Triangle -get_bsp_triangle(uint prim_id) +load_triangle(uint buffer_idx, uint prim_id) { + VboPrimitive prim = primitive_buffers[nonuniformEXT(buffer_idx)].primitives[prim_id]; + Triangle t; - t.positions[0] = get_positions_bsp(prim_id * 3 + 0); - t.positions[1] = get_positions_bsp(prim_id * 3 + 1); - t.positions[2] = get_positions_bsp(prim_id * 3 + 2); + t.positions[0] = prim.pos0; + t.positions[1] = prim.pos1; + t.positions[2] = prim.pos2; - t.positions_prev = t.positions; + t.positions_prev[0] = t.positions[0] + unpackHalf4x16(prim.custom0).xyz; + t.positions_prev[1] = t.positions[1] + unpackHalf4x16(prim.custom1).xyz; + t.positions_prev[2] = t.positions[2] + unpackHalf4x16(prim.custom2).xyz; - t.normals[0] = decode_normal(get_normals_bsp(prim_id * 3 + 0)); - t.normals[1] = decode_normal(get_normals_bsp(prim_id * 3 + 1)); - t.normals[2] = decode_normal(get_normals_bsp(prim_id * 3 + 2)); - - t.tangents[0] = decode_normal(get_tangents_bsp(prim_id * 3 + 0)); - t.tangents[1] = decode_normal(get_tangents_bsp(prim_id * 3 + 1)); - t.tangents[2] = decode_normal(get_tangents_bsp(prim_id * 3 + 2)); - - t.tex_coords[0] = get_tex_coords_bsp(prim_id * 3 + 0); - t.tex_coords[1] = get_tex_coords_bsp(prim_id * 3 + 1); - t.tex_coords[2] = get_tex_coords_bsp(prim_id * 3 + 2); + t.normals[0] = decode_normal(prim.normals.x); + t.normals[1] = decode_normal(prim.normals.y); + t.normals[2] = decode_normal(prim.normals.z); - t.material_id = get_materials_bsp(prim_id); + t.tangents[0] = decode_normal(prim.tangents.x); + t.tangents[1] = decode_normal(prim.tangents.y); + t.tangents[2] = decode_normal(prim.tangents.z); - t.cluster = get_clusters_bsp(prim_id); + t.tex_coords[0] = prim.uv0; + t.tex_coords[1] = prim.uv1; + t.tex_coords[2] = prim.uv2; - t.texel_density = get_texel_density_bsp(prim_id); + t.material_id = prim.material_id; + t.cluster = prim.cluster; + t.instance_index = prim.instance; + t.instance_prim = 0; + t.texel_density = prim.texel_density; - t.emissive_factor = get_emissive_factors_bsp(prim_id); - - t.alpha = 1.0; + vec2 emissive_and_alpha = unpackHalf2x16(prim.emissive_and_alpha); + t.emissive_factor = emissive_and_alpha.x; + t.alpha = emissive_and_alpha.y; return t; } Triangle -get_instanced_triangle(uint prim_id) +load_and_transform_triangle(int instance_idx, uint buffer_idx, uint prim_id) { - Triangle t; - t.positions[0] = get_positions_instanced(prim_id * 3 + 0); - t.positions[1] = get_positions_instanced(prim_id * 3 + 1); - t.positions[2] = get_positions_instanced(prim_id * 3 + 2); + Triangle t = load_triangle(buffer_idx, prim_id); - t.positions_prev[0] = get_pos_prev_instanced(prim_id * 3 + 0); - t.positions_prev[1] = get_pos_prev_instanced(prim_id * 3 + 1); - t.positions_prev[2] = get_pos_prev_instanced(prim_id * 3 + 2); - - t.normals[0] = decode_normal(get_normals_instanced(prim_id * 3 + 0)); - t.normals[1] = decode_normal(get_normals_instanced(prim_id * 3 + 1)); - t.normals[2] = decode_normal(get_normals_instanced(prim_id * 3 + 2)); - - t.tangents[0] = decode_normal(get_tangents_instanced(prim_id * 3 + 0)); - t.tangents[1] = decode_normal(get_tangents_instanced(prim_id * 3 + 1)); - t.tangents[2] = decode_normal(get_tangents_instanced(prim_id * 3 + 2)); - - t.tex_coords[0] = get_tex_coords_instanced(prim_id * 3 + 0); - t.tex_coords[1] = get_tex_coords_instanced(prim_id * 3 + 1); - t.tex_coords[2] = get_tex_coords_instanced(prim_id * 3 + 2); - - t.material_id = get_materials_instanced(prim_id); - - t.cluster = get_clusters_instanced(prim_id); - - t.alpha = get_alpha_instanced(prim_id); - - t.texel_density = get_texel_density_instanced(prim_id); + if (instance_idx >= 0) + { + // Instance of a static mesh: transform the vertices. + + ModelInstance mi = instance_buffer.model_instances[instance_idx]; + + t.positions[0] = vec3(mi.transform * vec4(t.positions[0], 1.0)); + t.positions[1] = vec3(mi.transform * vec4(t.positions[1], 1.0)); + t.positions[2] = vec3(mi.transform * vec4(t.positions[2], 1.0)); + + t.positions_prev[0] = vec3(mi.transform_prev * vec4(t.positions_prev[0], 1.0)); + t.positions_prev[1] = vec3(mi.transform_prev * vec4(t.positions_prev[1], 1.0)); + t.positions_prev[2] = vec3(mi.transform_prev * vec4(t.positions_prev[2], 1.0)); + + t.normals[0] = normalize(vec3(mi.transform * vec4(t.normals[0], 0.0))); + t.normals[1] = normalize(vec3(mi.transform * vec4(t.normals[1], 0.0))); + t.normals[2] = normalize(vec3(mi.transform * vec4(t.normals[2], 0.0))); + + t.tangents[0] = normalize(vec3(mi.transform * vec4(t.tangents[0], 0.0))); + t.tangents[1] = normalize(vec3(mi.transform * vec4(t.tangents[1], 0.0))); + t.tangents[2] = normalize(vec3(mi.transform * vec4(t.tangents[2], 0.0))); + + if (mi.material != 0) + t.material_id = mi.material; + t.cluster = mi.cluster; + t.emissive_factor = 1.0; + t.alpha = mi.alpha; + + // Store the index of that instance and the prim offset relative to the instance. + t.instance_index = uint(instance_idx); + t.instance_prim = prim_id - mi.render_prim_offset; + } + else if (buffer_idx == VERTEX_BUFFER_INSTANCED) + { + // Instance of an animated or skinned mesh, coming from the primbuf. + // In this case, `instance_idx` is -1 because it's not a static mesh, + // so load the original animated instance to find out its prim offset. - t.emissive_factor = 1.f; + ModelInstance mi = instance_buffer.model_instances[t.instance_index]; + t.instance_prim = prim_id - mi.render_prim_offset; + } + else if (buffer_idx == VERTEX_BUFFER_WORLD) + { + // Static BSP primitive. + + t.instance_index = ~0u; + t.instance_prim = prim_id; + } return t; } #ifndef VERTEX_READONLY void -store_instanced_triangle(Triangle t, uint instance_id, uint prim_id) +store_triangle(Triangle t, uint buffer_idx, uint prim_id) { - set_positions_instanced(prim_id * 3 + 0, t.positions[0]); - set_positions_instanced(prim_id * 3 + 1, t.positions[1]); - set_positions_instanced(prim_id * 3 + 2, t.positions[2]); - - set_pos_prev_instanced(prim_id * 3 + 0, t.positions_prev[0]); - set_pos_prev_instanced(prim_id * 3 + 1, t.positions_prev[1]); - set_pos_prev_instanced(prim_id * 3 + 2, t.positions_prev[2]); - - set_normals_instanced(prim_id * 3 + 0, encode_normal(t.normals[0])); - set_normals_instanced(prim_id * 3 + 1, encode_normal(t.normals[1])); - set_normals_instanced(prim_id * 3 + 2, encode_normal(t.normals[2])); + VboPrimitive prim; - set_tangents_instanced(prim_id * 3 + 0, encode_normal(t.tangents[0])); - set_tangents_instanced(prim_id * 3 + 1, encode_normal(t.tangents[1])); - set_tangents_instanced(prim_id * 3 + 2, encode_normal(t.tangents[2])); + prim.pos0 = t.positions[0]; + prim.pos1 = t.positions[1]; + prim.pos2 = t.positions[2]; - set_tex_coords_instanced(prim_id * 3 + 0, t.tex_coords[0]); - set_tex_coords_instanced(prim_id * 3 + 1, t.tex_coords[1]); - set_tex_coords_instanced(prim_id * 3 + 2, t.tex_coords[2]); + prim.custom0 = packHalf4x16(vec4(t.positions_prev[0] - t.positions[0], 0)); + prim.custom1 = packHalf4x16(vec4(t.positions_prev[1] - t.positions[1], 0)); + prim.custom2 = packHalf4x16(vec4(t.positions_prev[2] - t.positions[2], 0)); - set_materials_instanced(prim_id, t.material_id); + prim.normals.x = encode_normal(t.normals[0]); + prim.normals.y = encode_normal(t.normals[1]); + prim.normals.z = encode_normal(t.normals[2]); - set_instance_id_instanced(prim_id, instance_id); + prim.tangents.x = encode_normal(t.tangents[0]); + prim.tangents.y = encode_normal(t.tangents[1]); + prim.tangents.z = encode_normal(t.tangents[2]); - set_clusters_instanced(prim_id, t.cluster); + prim.uv0 = t.tex_coords[0]; + prim.uv1 = t.tex_coords[1]; + prim.uv2 = t.tex_coords[2]; - set_alpha_instanced(prim_id, t.alpha); + prim.material_id = t.material_id; + prim.cluster = t.cluster; + prim.instance = t.instance_index; + prim.texel_density = t.texel_density; + prim.emissive_and_alpha = packHalf2x16(vec2(t.emissive_factor, t.alpha)); + + primitive_buffers[nonuniformEXT(buffer_idx)].primitives[prim_id] = prim; - set_texel_density_instanced(prim_id, t.texel_density); + if (buffer_idx == VERTEX_BUFFER_INSTANCED) + { + for (int vert = 0; vert < 3; vert++) + { + for (int axis = 0; axis < 3; axis++) + { + instanced_position_buffer.positions[prim_id * 9 + vert * 3 + axis] + = t.positions[vert][axis]; + } + } + } } #endif diff --git a/src/refresh/vkpt/shadow_map.c b/src/refresh/vkpt/shadow_map.c index d6eccd08b..e2d5b1bbe 100644 --- a/src/refresh/vkpt/shadow_map.c +++ b/src/refresh/vkpt/shadow_map.c @@ -17,7 +17,6 @@ with this program; if not, write to the Free Software Foundation, Inc., */ #include "vkpt.h" -#include "shader/vertex_buffer.h" #include @@ -32,6 +31,38 @@ static VkImageView imv_smap_depth2; static VkImageView imv_smap_depth_array; static VkDeviceMemory mem_smap; + +typedef struct +{ + mat4_t model_matrix; + VkBuffer buffer; + size_t vertex_offset; + uint32_t prim_count; +} shadowmap_instance_t; + +static uint32_t num_shadowmap_instances; +static shadowmap_instance_t shadowmap_instances[MAX_MODEL_INSTANCES]; + +void vkpt_shadow_map_reset_instances() +{ + num_shadowmap_instances = 0; +} + +void vkpt_shadow_map_add_instance(const float* model_matrix, VkBuffer buffer, size_t vertex_offset, uint32_t prim_count) +{ + if (num_shadowmap_instances < MAX_MODEL_INSTANCES) + { + shadowmap_instance_t* instance = shadowmap_instances + num_shadowmap_instances; + + memcpy(instance->model_matrix, model_matrix, sizeof(mat4_t)); + instance->buffer = buffer; + instance->vertex_offset = vertex_offset; + instance->prim_count = prim_count; + + ++num_shadowmap_instances; + } +} + static void create_render_pass() { @@ -361,7 +392,10 @@ vkpt_shadow_map_destroy_pipelines() } VkResult -vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, int num_static_verts, int num_dynamic_verts, int transparent_offset, int num_transparent_verts) +vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, + uint32_t static_offset, uint32_t num_static_verts, + uint32_t dynamic_offset, uint32_t num_dynamic_verts, + uint32_t transparent_offset, uint32_t num_transparent_verts) { IMAGE_BARRIER(cmd_buf, .image = img_smap, @@ -407,17 +441,32 @@ vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, i vkCmdSetScissor(cmd_buf, 0, 1, &scissor); - vkCmdPushConstants(cmd_buf, pipeline_layout_smap, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(float) * 16, view_projection_matrix); + vkCmdPushConstants(cmd_buf, pipeline_layout_smap, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(mat4_t), view_projection_matrix); + + VkDeviceSize vertex_offset = vkpt_refdef.bsp_mesh_world.vertex_data_offset; + vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_world.buffer, &vertex_offset); - VkDeviceSize vertex_offset = offsetof(struct BspVertexBuffer, positions_bsp); - vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_vertex_bsp.buffer, &vertex_offset); + vkCmdDraw(cmd_buf, num_static_verts, 1, static_offset, 0); - vkCmdDraw(cmd_buf, num_static_verts, 1, 0, 0); + vertex_offset = 0; + vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_positions_instanced.buffer, &vertex_offset); - vertex_offset = offsetof(struct ModelDynamicVertexBuffer, positions_instanced); - vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_vertex_model_dynamic.buffer, &vertex_offset); + vkCmdDraw(cmd_buf, num_dynamic_verts, 1, dynamic_offset, 0); + + mat4_t mvp_matrix; + + for (uint32_t instance_idx = 0; instance_idx < num_shadowmap_instances; instance_idx++) + { + const shadowmap_instance_t* mi = shadowmap_instances + instance_idx; - vkCmdDraw(cmd_buf, num_dynamic_verts, 1, 0, 0); + mult_matrix_matrix(mvp_matrix, view_projection_matrix, mi->model_matrix); + + vkCmdPushConstants(cmd_buf, pipeline_layout_smap, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(mat4_t), mvp_matrix); + + vkCmdBindVertexBuffers(cmd_buf, 0, 1, &mi->buffer, &mi->vertex_offset); + + vkCmdDraw(cmd_buf, mi->prim_count * 3, 1, 0, 0); + } vkCmdEndRenderPass(cmd_buf); @@ -425,8 +474,10 @@ vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, i render_pass_info.framebuffer = framebuffer_smap2; vkCmdBeginRenderPass(cmd_buf, &render_pass_info, VK_SUBPASS_CONTENTS_INLINE); - vertex_offset = offsetof(struct BspVertexBuffer, positions_bsp); - vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_vertex_bsp.buffer, &vertex_offset); + vkCmdPushConstants(cmd_buf, pipeline_layout_smap, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(mat4_t), view_projection_matrix); + + vertex_offset = vkpt_refdef.bsp_mesh_world.vertex_data_offset; + vkCmdBindVertexBuffers(cmd_buf, 0, 1, &qvk.buf_world.buffer, &vertex_offset); vkCmdDraw(cmd_buf, num_transparent_verts, 1, transparent_offset, 0); diff --git a/src/refresh/vkpt/textures.c b/src/refresh/vkpt/textures.c index 16779a977..916839e72 100644 --- a/src/refresh/vkpt/textures.c +++ b/src/refresh/vkpt/textures.c @@ -157,19 +157,30 @@ void vkpt_textures_destroy_unused() textures_destroy_unused_set((qvk.frame_counter) % DESTROY_LATENCY); } -VkResult -vkpt_textures_upload_envmap(int w, int h, byte *data) +static void +destroy_envmap() { - vkDeviceWaitIdle(qvk.device); - if(imv_envmap != VK_NULL_HANDLE) { + if (imv_envmap != VK_NULL_HANDLE) { vkDestroyImageView(qvk.device, imv_envmap, NULL); imv_envmap = NULL; } - if(img_envmap != VK_NULL_HANDLE) { + if (img_envmap != VK_NULL_HANDLE) { vkDestroyImage(qvk.device, img_envmap, NULL); img_envmap = NULL; } + if (mem_envmap != VK_NULL_HANDLE) { + vkFreeMemory(qvk.device, mem_envmap, NULL); + mem_envmap = VK_NULL_HANDLE; + } +} + +VkResult +vkpt_textures_upload_envmap(int w, int h, byte *data) +{ + vkDeviceWaitIdle(qvk.device); + destroy_envmap(); + const int num_images = 6; size_t img_size = w * h * 4; @@ -1521,19 +1532,7 @@ vkpt_textures_destroy() vkDestroySampler (qvk.device, qvk.tex_sampler_nearest_mipmap_aniso, NULL); vkDestroySampler (qvk.device, qvk.tex_sampler_linear_clamp, NULL); - if(imv_envmap != VK_NULL_HANDLE) { - vkDestroyImageView(qvk.device, imv_envmap, NULL); - imv_envmap = NULL; - } - if(img_envmap != VK_NULL_HANDLE) { - vkDestroyImage(qvk.device, img_envmap, NULL); - img_envmap = NULL; - } - if (mem_envmap != VK_NULL_HANDLE) { - vkFreeMemory(qvk.device, mem_envmap, NULL); - mem_envmap = VK_NULL_HANDLE; - } - + destroy_envmap(); destroy_invalid_texture(); destroy_device_memory_allocator(tex_device_memory_allocator); tex_device_memory_allocator = NULL; diff --git a/src/refresh/vkpt/uniform_buffer.c b/src/refresh/vkpt/uniform_buffer.c index fda3a6eb7..9846865c2 100644 --- a/src/refresh/vkpt/uniform_buffer.c +++ b/src/refresh/vkpt/uniform_buffer.c @@ -56,7 +56,7 @@ vkpt_uniform_buffer_create() vkGetPhysicalDeviceProperties(qvk.physical_device, &properties); ubo_alignment = properties.limits.minUniformBufferOffsetAlignment; - const size_t buffer_size = align(sizeof(QVKUniformBuffer_t), ubo_alignment) + sizeof(QVKInstanceBuffer_t); + const size_t buffer_size = align(sizeof(QVKUniformBuffer_t), ubo_alignment) + sizeof(InstanceBuffer); for(int i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) buffer_create(host_uniform_buffers + i, buffer_size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, host_memory_flags); @@ -95,7 +95,7 @@ vkpt_uniform_buffer_create() VkDescriptorBufferInfo buf1_info = { .buffer = device_uniform_buffer.buffer, .offset = align(sizeof(QVKUniformBuffer_t), ubo_alignment), - .range = sizeof(QVKInstanceBuffer_t), + .range = sizeof(InstanceBuffer), }; VkWriteDescriptorSet writes[2] = { 0 }; @@ -137,26 +137,36 @@ vkpt_uniform_buffer_destroy() } VkResult -vkpt_uniform_buffer_update(VkCommandBuffer command_buffer) +vkpt_uniform_buffer_upload_to_staging() { - BufferResource_t *ubo = host_uniform_buffers + qvk.current_frame_index; + BufferResource_t* ubo = host_uniform_buffers + qvk.current_frame_index; assert(ubo); assert(ubo->memory != VK_NULL_HANDLE); assert(ubo->buffer != VK_NULL_HANDLE); assert(qvk.current_frame_index < MAX_FRAMES_IN_FLIGHT); - QVKUniformBuffer_t *mapped_ubo = buffer_map(ubo); + QVKUniformBuffer_t* mapped_ubo = buffer_map(ubo); assert(mapped_ubo); + if (!mapped_ubo) + return VK_ERROR_MEMORY_MAP_FAILED; + memcpy(mapped_ubo, &vkpt_refdef.uniform_buffer, sizeof(QVKUniformBuffer_t)); const size_t offset = align(sizeof(QVKUniformBuffer_t), ubo_alignment); - memcpy((uint8_t*)mapped_ubo + offset, &vkpt_refdef.uniform_instance_buffer, sizeof(QVKInstanceBuffer_t)); + memcpy((uint8_t*)mapped_ubo + offset, &vkpt_refdef.uniform_instance_buffer, sizeof(InstanceBuffer)); buffer_unmap(ubo); - mapped_ubo = NULL; + + return VK_SUCCESS; +} + +void +vkpt_uniform_buffer_copy_from_staging(VkCommandBuffer command_buffer) +{ + BufferResource_t* ubo = host_uniform_buffers + qvk.current_frame_index; VkBufferCopy copy = { 0 }; - copy.size = align(sizeof(QVKUniformBuffer_t), ubo_alignment) + sizeof(QVKInstanceBuffer_t); + copy.size = align(sizeof(QVKUniformBuffer_t), ubo_alignment) + sizeof(InstanceBuffer); vkCmdCopyBuffer(command_buffer, ubo->buffer, device_uniform_buffer.buffer, 1, ©); VkBufferMemoryBarrier barrier = { @@ -171,8 +181,6 @@ vkpt_uniform_buffer_update(VkCommandBuffer command_buffer) vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1, &barrier, 0, NULL); - - return VK_SUCCESS; } diff --git a/src/refresh/vkpt/vertex_buffer.c b/src/refresh/vkpt/vertex_buffer.c index f9912ecc1..15545dce1 100644 --- a/src/refresh/vkpt/vertex_buffer.c +++ b/src/refresh/vkpt/vertex_buffer.c @@ -23,9 +23,7 @@ with this program; if not, write to the Free Software Foundation, Inc., #include "material.h" #include -#include #include "conversion.h" -#include "precomputed_sky.h" static VkDescriptorPool desc_pool_vertex_buffer; @@ -33,36 +31,348 @@ static VkPipeline pipeline_instance_geometry; static VkPipeline pipeline_animate_materials; static VkPipelineLayout pipeline_layout_instance_geometry; -typedef struct { - BufferResource_t buffer; - BufferResource_t staging_buffer; - int registration_sequence; -} model_vbo_t; - model_vbo_t model_vertex_data[MAX_MODELS]; static BufferResource_t null_buffer; +// Cvar that controls the initial animated primitive buffer size at startup. +// The buffer can grow later if necessary, but that causes stutter. +static cvar_t* cvar_pt_primbuf = NULL; +static uint32_t current_primbuf_size = 0; + +// Clamps and default setting for the animated primitive buffer size +#define PRIMBUF_SIZE_MIN (1 << 16) +#define PRIMBUF_SIZE_MAX (1 << 26) +#define PRIMBUF_SIZE_DEFAULT (1 << 20) + +// Per Vulkan spec, acceleration structure offset must be a multiple of 256 +// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VkAccelerationStructureCreateInfoKHR.html +#define ACCEL_STRUCT_ALIGNMENT 256 + +void vkpt_init_model_geometry(model_geometry_t* info, uint32_t max_geometries) +{ + assert(info->geometry_storage == NULL); // avoid double allocation + + if (max_geometries == 0) + return; + + size_t size_geometries = max_geometries * sizeof(VkAccelerationStructureGeometryKHR); + size_t size_build_ranges = max_geometries * sizeof(VkAccelerationStructureBuildRangeInfoKHR); + size_t size_prims = max_geometries * sizeof(uint32_t); + + info->geometry_storage = Z_Mallocz(size_geometries + size_build_ranges + size_prims * 2); + + info->geometries = (VkAccelerationStructureGeometryKHR*)info->geometry_storage; + info->build_ranges = (VkAccelerationStructureBuildRangeInfoKHR*)(info->geometry_storage + size_geometries); + info->prim_counts = (uint32_t*)(info->geometry_storage + size_geometries + size_build_ranges); + info->prim_offsets = (uint32_t*)(info->geometry_storage + size_geometries + size_build_ranges + size_prims); + + info->max_geometries = max_geometries; +} + +void vkpt_destroy_model_geometry(model_geometry_t* info) +{ + if (!info->geometry_storage) + return; + + Z_Free(info->geometry_storage); + info->geometry_storage = NULL; + info->geometries = NULL; + info->build_ranges = NULL; + info->prim_counts = NULL; + info->prim_offsets = NULL; + + if (info->accel) + { + qvkDestroyAccelerationStructureKHR(qvk.device, info->accel, NULL); + info->accel = NULL; + } +} + +void vkpt_append_model_geometry(model_geometry_t* info, uint32_t num_prims, uint32_t prim_offset, const char* model_name) +{ + if (num_prims == 0) + return; + + if (info->num_geometries >= info->max_geometries) + { + Com_WPrintf("Model '%s' exceeds the maximum supported number of meshes (%d)\n", model_name, info->max_geometries); + return; + } + + VkAccelerationStructureGeometryKHR geometry = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR, + .geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR, + .geometry = { + .triangles = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR, + .vertexFormat = VK_FORMAT_R32G32B32_SFLOAT, + .vertexStride = sizeof(vec3), + .maxVertex = num_prims * 3, + .indexType = VK_INDEX_TYPE_NONE_KHR + } + } + }; + + VkAccelerationStructureBuildRangeInfoKHR build_range = { + .primitiveCount = num_prims + }; + + uint32_t geom_index = info->num_geometries; + + info->geometries[geom_index] = geometry; + info->build_ranges[geom_index] = build_range; + info->prim_counts[geom_index] = num_prims; + info->prim_offsets[geom_index] = prim_offset; + + ++info->num_geometries; +} + +static void suballocate_model_blas_memory(model_geometry_t* info, size_t* vbo_size, const char* model_name) +{ + VkAccelerationStructureBuildSizesInfoKHR build_sizes = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR, + }; + + info->build_sizes = build_sizes; + + if (info->num_geometries == 0) + return; + + VkAccelerationStructureBuildGeometryInfoKHR blasBuildinfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = info->num_geometries, + .pGeometries = info->geometries + }; + + qvkGetAccelerationStructureBuildSizesKHR(qvk.device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, + &blasBuildinfo, info->prim_counts, &info->build_sizes); + + if (info->build_sizes.buildScratchSize > buf_accel_scratch.size) + { + Com_WPrintf("Model '%s' requires %lu bytes scratch buffer to build its BLAS, while only %zu are available.\n", + model_name, info->build_sizes.buildScratchSize, buf_accel_scratch.size); + + info->num_geometries = 0; + } + else + { + *vbo_size = align(*vbo_size, ACCEL_STRUCT_ALIGNMENT); + + info->blas_data_offset = *vbo_size; + *vbo_size += info->build_sizes.accelerationStructureSize; + } +} + +static void create_model_blas(model_geometry_t* info, VkBuffer buffer, const char* name) +{ + if (info->num_geometries == 0) + return; + + VkAccelerationStructureCreateInfoKHR blasCreateInfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .buffer = buffer, + .offset = info->blas_data_offset, + .size = info->build_sizes.accelerationStructureSize, + }; + + _VK(qvkCreateAccelerationStructureKHR(qvk.device, &blasCreateInfo, NULL, &info->accel)); + + VkAccelerationStructureDeviceAddressInfoKHR as_device_address_info = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_DEVICE_ADDRESS_INFO_KHR, + .accelerationStructure = info->accel + }; + + info->blas_device_address = qvkGetAccelerationStructureDeviceAddressKHR(qvk.device, &as_device_address_info); + + if (name) + ATTACH_LABEL_VARIABLE_NAME(info->accel, ACCELERATION_STRUCTURE_KHR, name); +} + +static void build_model_blas(VkCommandBuffer cmd_buf, model_geometry_t* info, size_t first_vertex_offset, const BufferResource_t* buffer) +{ + if (!info->accel) + return; + + assert(buffer->address); + + uint32_t total_prims = 0; + + for (uint32_t index = 0; index < info->num_geometries; index++) + { + VkAccelerationStructureGeometryKHR* geometry = info->geometries + index; + + geometry->geometry.triangles.vertexData.deviceAddress = buffer->address + + info->prim_offsets[index] * sizeof(prim_positions_t) + first_vertex_offset; + + total_prims += info->prim_counts[index]; + } + + VkAccelerationStructureBuildGeometryInfoKHR blasBuildinfo = { + .sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR, + .type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, + .flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR, + .mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR, + .geometryCount = info->num_geometries, + .pGeometries = info->geometries, + .dstAccelerationStructure = info->accel, + .scratchData = { + .deviceAddress = buf_accel_scratch.address + } + }; + + const VkAccelerationStructureBuildRangeInfoKHR* pBlasBuildRange = info->build_ranges; + + qvkCmdBuildAccelerationStructuresKHR(cmd_buf, 1, &blasBuildinfo, &pBlasBuildRange); + + VkMemoryBarrier barrier = { + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR + | VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, + .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR + }; + + vkCmdPipelineBarrier(cmd_buf, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR, + VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR, 0, 1, + &barrier, 0, 0, 0, 0); +} + VkResult -vkpt_vertex_buffer_bsp_upload_staging() +vkpt_vertex_buffer_upload_bsp_mesh(bsp_mesh_t* bsp_mesh) { - vkWaitForFences(qvk.device, 1, &qvk.fence_vertex_sync, VK_TRUE, ~((uint64_t)0)); - vkResetFences(qvk.device, 1, &qvk.fence_vertex_sync); + assert(bsp_mesh); + + vkDeviceWaitIdle(qvk.device); + + // Destroy the world buffer from the previous map. + buffer_destroy(&qvk.buf_world); + size_t vbo_size = bsp_mesh->num_primitives * sizeof(VboPrimitive); + bsp_mesh->vertex_data_offset = vbo_size; + vbo_size += bsp_mesh->num_primitives * sizeof(prim_positions_t); + size_t staging_size = vbo_size; + + suballocate_model_blas_memory(&bsp_mesh->geom_opaque, &vbo_size, "bsp:opaque"); + suballocate_model_blas_memory(&bsp_mesh->geom_transparent, &vbo_size, "bsp:transparent"); + suballocate_model_blas_memory(&bsp_mesh->geom_masked, &vbo_size, "bsp:masked"); + suballocate_model_blas_memory(&bsp_mesh->geom_sky, &vbo_size, "bsp:sky"); + suballocate_model_blas_memory(&bsp_mesh->geom_custom_sky, &vbo_size, "bsp:custom_sky"); + + char name[MAX_QPATH]; + + for (int i = 0; i < bsp_mesh->num_models; i++) + { + bsp_model_t* model = bsp_mesh->models + i; + + Q_snprintf(name, sizeof(name), "bsp:models[%d]", i); + + suballocate_model_blas_memory(&model->geometry, &vbo_size, name); + } + + VkResult res = buffer_create(&qvk.buf_world, vbo_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + if (res != VK_SUCCESS) return res; + + ATTACH_LABEL_VARIABLE(qvk.buf_world.buffer, BUFFER); + ATTACH_LABEL_VARIABLE(qvk.buf_world.memory, DEVICE_MEMORY); + + BufferResource_t staging_buffer; + + res = buffer_create(&staging_buffer, staging_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + if (res != VK_SUCCESS) return res; + + create_model_blas(&bsp_mesh->geom_opaque, qvk.buf_world.buffer, "bsp:opaque"); + create_model_blas(&bsp_mesh->geom_transparent, qvk.buf_world.buffer, "bsp:transparent"); + create_model_blas(&bsp_mesh->geom_masked, qvk.buf_world.buffer, "bsp:masked"); + create_model_blas(&bsp_mesh->geom_sky, qvk.buf_world.buffer, "bsp:sky"); + create_model_blas(&bsp_mesh->geom_custom_sky, qvk.buf_world.buffer, "bsp:custom_sky"); + + for (int i = 0; i < bsp_mesh->num_models; i++) + { + bsp_model_t* model = bsp_mesh->models + i; + + Q_snprintf(name, sizeof(name), "bsp:models[%d]", i); + + create_model_blas(&model->geometry, qvk.buf_world.buffer, name); + } + + uint8_t* staging_data = buffer_map(&staging_buffer); + memcpy(staging_data, bsp_mesh->primitives, bsp_mesh->num_primitives * sizeof(VboPrimitive)); + + prim_positions_t* positions = (prim_positions_t*)(staging_data + bsp_mesh->vertex_data_offset); // NOLINT(clang-diagnostic-cast-align) + for (uint32_t prim = 0; prim < bsp_mesh->num_primitives; ++prim) + { + VectorCopy(bsp_mesh->primitives[prim].pos0, positions[prim][0]); + VectorCopy(bsp_mesh->primitives[prim].pos1, positions[prim][1]); + VectorCopy(bsp_mesh->primitives[prim].pos2, positions[prim][2]); + } + + buffer_unmap(&staging_buffer); VkCommandBuffer cmd_buf = vkpt_begin_command_buffer(&qvk.cmd_buffers_graphics); VkBufferCopy copyRegion = { - .size = sizeof(BspVertexBuffer), + .size = staging_buffer.size, }; - vkCmdCopyBuffer(cmd_buf, qvk.buf_vertex_bsp_staging.buffer, qvk.buf_vertex_bsp.buffer, 1, ©Region); - + vkCmdCopyBuffer(cmd_buf, staging_buffer.buffer, qvk.buf_world.buffer, 1, ©Region); + BUFFER_BARRIER(cmd_buf, .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, - .buffer = qvk.buf_vertex_bsp.buffer, + .buffer = qvk.buf_world.buffer, .offset = 0, .size = VK_WHOLE_SIZE, ); + build_model_blas(cmd_buf, &bsp_mesh->geom_opaque, bsp_mesh->vertex_data_offset, &qvk.buf_world); + build_model_blas(cmd_buf, &bsp_mesh->geom_transparent, bsp_mesh->vertex_data_offset, &qvk.buf_world); + build_model_blas(cmd_buf, &bsp_mesh->geom_masked, bsp_mesh->vertex_data_offset, &qvk.buf_world); + build_model_blas(cmd_buf, &bsp_mesh->geom_sky, bsp_mesh->vertex_data_offset, &qvk.buf_world); + build_model_blas(cmd_buf, &bsp_mesh->geom_custom_sky, bsp_mesh->vertex_data_offset, &qvk.buf_world); + + bsp_mesh->geom_opaque.instance_mask = AS_FLAG_OPAQUE; + bsp_mesh->geom_opaque.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + bsp_mesh->geom_opaque.sbt_offset = SBTO_OPAQUE; + + bsp_mesh->geom_transparent.instance_mask = AS_FLAG_TRANSPARENT; + bsp_mesh->geom_transparent.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + bsp_mesh->geom_transparent.sbt_offset = SBTO_OPAQUE; + + bsp_mesh->geom_masked.instance_mask = AS_FLAG_OPAQUE; + bsp_mesh->geom_masked.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + bsp_mesh->geom_masked.sbt_offset = SBTO_MASKED; + + bsp_mesh->geom_sky.instance_mask = AS_FLAG_SKY; + bsp_mesh->geom_sky.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + bsp_mesh->geom_sky.sbt_offset = SBTO_OPAQUE; + + bsp_mesh->geom_custom_sky.instance_mask = AS_FLAG_CUSTOM_SKY; + bsp_mesh->geom_custom_sky.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + bsp_mesh->geom_custom_sky.sbt_offset = SBTO_OPAQUE; + + for (int i = 0; i < bsp_mesh->num_models; i++) + { + bsp_model_t* model = bsp_mesh->models + i; + build_model_blas(cmd_buf, &model->geometry, bsp_mesh->vertex_data_offset, &qvk.buf_world); + + model->geometry.instance_mask = model->transparent ? bsp_mesh->geom_transparent.instance_mask : bsp_mesh->geom_opaque.instance_mask; + model->geometry.instance_flags = model->masked ? bsp_mesh->geom_masked.instance_flags : model->transparent ? bsp_mesh->geom_transparent.instance_flags : bsp_mesh->geom_opaque.instance_flags; + model->geometry.sbt_offset = model->masked ? bsp_mesh->geom_masked.sbt_offset : bsp_mesh->geom_opaque.sbt_offset; + } + if (qvk.buf_light_stats[0].buffer) { vkCmdFillBuffer(cmd_buf, qvk.buf_light_stats[0].buffer, 0, qvk.buf_light_stats[0].size, 0); @@ -70,11 +380,51 @@ vkpt_vertex_buffer_bsp_upload_staging() vkCmdFillBuffer(cmd_buf, qvk.buf_light_stats[2].buffer, 0, qvk.buf_light_stats[2].size, 0); } - vkpt_submit_command_buffer(cmd_buf, qvk.queue_graphics, (1 << qvk.device_count) - 1, 0, NULL, NULL, NULL, 0, NULL, NULL, qvk.fence_vertex_sync); + vkpt_submit_command_buffer(cmd_buf, qvk.queue_graphics, (1 << qvk.device_count) - 1, 0, NULL, NULL, NULL, 0, NULL, NULL, NULL); + + vkDeviceWaitIdle(qvk.device); + + buffer_destroy(&staging_buffer); + + + VkDescriptorBufferInfo buf_info = { + .buffer = qvk.buf_world.buffer, + .offset = 0, + .range = bsp_mesh->num_primitives * sizeof(VboPrimitive), + }; + + VkWriteDescriptorSet output_buf_write = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = qvk.desc_set_vertex_buffer, + .dstBinding = PRIMITIVE_BUFFER_BINDING_IDX, + .dstArrayElement = VERTEX_BUFFER_WORLD, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &buf_info, + }; + + vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); + return VK_SUCCESS; } +void vkpt_vertex_buffer_cleanup_bsp_mesh(bsp_mesh_t* bsp_mesh) +{ + vkpt_destroy_model_geometry(&bsp_mesh->geom_opaque); + vkpt_destroy_model_geometry(&bsp_mesh->geom_transparent); + vkpt_destroy_model_geometry(&bsp_mesh->geom_masked); + vkpt_destroy_model_geometry(&bsp_mesh->geom_sky); + vkpt_destroy_model_geometry(&bsp_mesh->geom_custom_sky); + + for (int i = 0; i < bsp_mesh->num_models; i++) + { + bsp_model_t* model = bsp_mesh->models + i; + + vkpt_destroy_model_geometry(&model->geometry); + } +} + VkResult vkpt_light_buffer_upload_staging(VkCommandBuffer cmd_buf) { @@ -111,44 +461,6 @@ vkpt_iqm_matrix_buffer_upload_staging(VkCommandBuffer cmd_buf) return VK_SUCCESS; } -VkResult -vkpt_vertex_buffer_upload_bsp_mesh_to_staging(bsp_mesh_t *bsp_mesh) -{ - assert(bsp_mesh); - BspVertexBuffer *vbo = (BspVertexBuffer *) buffer_map(&qvk.buf_vertex_bsp_staging); - assert(vbo); - - int num_vertices = bsp_mesh->num_vertices; - if (num_vertices > MAX_VERT_BSP) - { - assert(!"Vertex buffer overflow"); - num_vertices = MAX_VERT_BSP; - } - - memcpy(vbo->positions_bsp, bsp_mesh->positions, num_vertices * sizeof(float) * 3 ); - memcpy(vbo->tex_coords_bsp, bsp_mesh->tex_coords,num_vertices * sizeof(float) * 2 ); - memcpy(vbo->normals_bsp, bsp_mesh->normals, num_vertices * sizeof(uint32_t)); - memcpy(vbo->tangents_bsp, bsp_mesh->tangents, num_vertices * sizeof(uint32_t)); - memcpy(vbo->materials_bsp, bsp_mesh->materials, num_vertices * sizeof(uint32_t) / 3); - memcpy(vbo->emissive_factors_bsp, bsp_mesh->emissive_factors, num_vertices * sizeof(uint32_t) / 3); - memcpy(vbo->clusters_bsp, bsp_mesh->clusters, num_vertices * sizeof(uint32_t) / 3); - memcpy(vbo->texel_density_bsp, bsp_mesh->texel_density, num_vertices * sizeof(float) / 3); - - int num_clusters = bsp_mesh->num_clusters; - if (num_clusters > MAX_LIGHT_LISTS) - { - assert(!"Visibility buffer overflow"); - num_clusters = MAX_LIGHT_LISTS; - } - - memcpy(vbo->sky_visibility, bsp_mesh->sky_visibility, (num_clusters + 7) / 8); - - buffer_unmap(&qvk.buf_vertex_bsp_staging); - vbo = NULL; - - return VK_SUCCESS; -} - static int local_light_counts[MAX_MAP_LEAFS]; static int cluster_light_counts[MAX_MAP_LEAFS]; static int light_list_tails[MAX_MAP_LEAFS]; @@ -301,7 +613,6 @@ copy_light(const light_poly_t* light, float* vblight, const float* sky_radiance) vblight[15] = 0.f; } -extern vkpt_refdef_t vkpt_refdef; extern char cluster_debug_mask[VIS_MAX_BYTES]; VkResult @@ -403,6 +714,7 @@ vkpt_light_buffer_upload_to_staging(bool render_world, bsp_mesh_t *bsp_mesh, bsp } memcpy(lbo->cluster_debug_mask, cluster_debug_mask, MAX_LIGHT_LISTS / 8); + memcpy(lbo->sky_visibility, bsp_mesh->sky_visibility, MAX_LIGHT_LISTS / 8); buffer_unmap(staging); lbo = NULL; @@ -420,9 +732,9 @@ static void write_model_vbo_descriptor(int index, VkBuffer buffer, VkDeviceSize VkWriteDescriptorSet write_descriptor_set = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, - .dstSet = qvk.desc_set_model_vbos, + .dstSet = qvk.desc_set_vertex_buffer, .dstBinding = 0, - .dstArrayElement = index, + .dstArrayElement = VERTEX_BUFFER_FIRST_MODEL + index, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .pBufferInfo = &descriptor_buffer_info, @@ -431,11 +743,160 @@ static void write_model_vbo_descriptor(int index, VkBuffer buffer, VkDeviceSize vkUpdateDescriptorSets(qvk.device, 1, &write_descriptor_set, 0, NULL); } +static void destroy_model_vbo(model_vbo_t* vbo) +{ + vkpt_destroy_model_geometry(&vbo->geom_opaque); + vkpt_destroy_model_geometry(&vbo->geom_transparent); + vkpt_destroy_model_geometry(&vbo->geom_masked); + + buffer_destroy(&vbo->buffer); + + memset(vbo, 0, sizeof(model_vbo_t)); +} + +static void +stage_mesh_primitives(uint8_t* staging_data, int* p_write_ptr, float** p_vertex_write_ptr, const model_t* model, const maliasmesh_t* m) +{ + int write_ptr = *p_write_ptr; + float* vertex_write_ptr = *p_vertex_write_ptr; + + for (int frame = 0; frame < model->numframes; frame++) + { + for (int tri = 0; tri < m->numtris; tri++) + { + VboPrimitive* dst = (VboPrimitive*)staging_data + write_ptr; + + int i0 = m->indices[tri * 3 + 0]; + int i1 = m->indices[tri * 3 + 1]; + int i2 = m->indices[tri * 3 + 2]; + + i0 += frame * m->numverts; + i1 += frame * m->numverts; + i2 += frame * m->numverts; + + VectorCopy(m->positions[i0], dst->pos0); + VectorCopy(m->positions[i1], dst->pos1); + VectorCopy(m->positions[i2], dst->pos2); + + if (vertex_write_ptr) + { + VectorCopy(m->positions[i0], vertex_write_ptr); vertex_write_ptr += 3; + VectorCopy(m->positions[i1], vertex_write_ptr); vertex_write_ptr += 3; + VectorCopy(m->positions[i2], vertex_write_ptr); vertex_write_ptr += 3; + } + + dst->normals[0] = encode_normal(m->normals[i0]); + dst->normals[1] = encode_normal(m->normals[i1]); + dst->normals[2] = encode_normal(m->normals[i2]); + + dst->tangents[0] = encode_normal(m->tangents[i0]); + dst->tangents[1] = encode_normal(m->tangents[i1]); + dst->tangents[2] = encode_normal(m->tangents[i2]); + + dst->uv0[0] = m->tex_coords[i0][0]; + dst->uv0[1] = m->tex_coords[i0][1]; + dst->uv1[0] = m->tex_coords[i1][0]; + dst->uv1[1] = m->tex_coords[i1][1]; + dst->uv2[0] = m->tex_coords[i2][0]; + dst->uv2[1] = m->tex_coords[i2][1]; + + if (m->blend_indices && m->blend_weights) + { + dst->custom0[0] = m->blend_indices[i0]; + dst->custom0[1] = m->blend_weights[i0]; + dst->custom1[0] = m->blend_indices[i1]; + dst->custom1[1] = m->blend_weights[i1]; + dst->custom2[0] = m->blend_indices[i2]; + dst->custom2[1] = m->blend_weights[i2]; + } + + dst->emissive_and_alpha = 0x3c003c00; // (1.0f, 1.0f) + dst->cluster = -1; + + ++write_ptr; + } + } + + *p_write_ptr = write_ptr; + *p_vertex_write_ptr = vertex_write_ptr; + +#if 0 + for (int j = 0; j < num_verts; j++) + Com_Printf("%f %f %f\n", + m->positions[j][0], + m->positions[j][1], + m->positions[j][2]); + + for (int j = 0; j < m->numtris; j++) + Com_Printf("%d %d %d\n", + m->indices[j * 3 + 0], + m->indices[j * 3 + 1], + m->indices[j * 3 + 2]); +#endif + +#if 0 + char buf[1024]; + snprintf(buf, sizeof buf, "model_%04d.obj", i); + FILE* f = fopen(buf, "wb+"); + assert(f); + for (int j = 0; j < m->numverts; j++) { + fprintf(f, "v %f %f %f\n", + m->positions[j][0], + m->positions[j][1], + m->positions[j][2]); + } + for (int j = 0; j < m->numindices / 3; j++) { + fprintf(f, "f %d %d %d\n", + m->indices[j * 3 + 0] + 1, + m->indices[j * 3 + 1] + 1, + m->indices[j * 3 + 2] + 1); + } + fclose(f); +#endif +} + +void vkpt_vertex_buffer_invalidate_static_model_vbos(int material_index) +{ + vkDeviceWaitIdle(qvk.device); + + pbr_material_t* mat = MAT_ForIndex(material_index); + + for (int i = 0; i < MAX_MODELS; i++) + { + const model_t* model = &r_models[i]; + model_vbo_t* vbo = model_vertex_data + i; + + // Only look at valid static meshes. + // Animated meshes don't need to be updated when their mateirals change because + // they don't have prebuilt and pre-categorized BLAS. + if (model->meshes && vbo->is_static) + { + // Look for the material being used in any of the meshes of this model + bool found = false; + for (int i_mesh = 0; i_mesh < model->nummeshes; i_mesh++) + { + maliasmesh_t* mesh = model->meshes + i_mesh; + + if (mesh->materials[0] == mat) + { + found = true; + break; + } + } + + // Invalidate and later re-upload the VBO if the material is used + if (found) + { + write_model_vbo_descriptor(i, null_buffer.buffer, null_buffer.size); + destroy_model_vbo(vbo); + } + } + } +} + VkResult vkpt_vertex_buffer_upload_models() { - int idx_offset = 0; - int vertex_offset = 0; bool any_models_to_upload = false; for(int i = 0; i < MAX_MODELS; i++) @@ -446,8 +907,7 @@ vkpt_vertex_buffer_upload_models() if (!model->meshes && vbo->buffer.buffer) { // model unloaded, destroy the VBO write_model_vbo_descriptor(i, null_buffer.buffer, null_buffer.size); - buffer_destroy(&vbo->buffer); - vbo->registration_sequence = 0; + destroy_model_vbo(vbo); //Com_Printf("Unloaded model[%d]\n", i); continue; } @@ -457,133 +917,141 @@ vkpt_vertex_buffer_upload_models() continue; } - if (model->registration_sequence <= vbo->registration_sequence) { + if (model->registration_sequence <= vbo->registration_sequence && vbo->buffer.buffer) { // VBO is valid, nothing to do continue; } - //Com_Printf("Loading model[%d] %s\n", i, model->name); + // Destroy the old buffers if they exist. + // This may happen when a model is unloaded and then another model + // is loaded in the same slot when changing a map. + destroy_model_vbo(vbo); + + memset(vbo, 0, sizeof(model_vbo_t)); assert(model->numframes > 0); - int model_vertices = 0; - int model_indices = 0; - for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) - { - maliasmesh_t *m = model->meshes + nmesh; - int num_verts = model->numframes * m->numverts; + bool model_is_static = model->numframes == 1 && (!model->iqmData || !model->iqmData->blend_indices); + vbo->is_static = model_is_static; + vbo->total_tris = 0; - model_vertices += num_verts; - model_indices += m->numindices; - } + if (model_is_static) + { + // Count the geometries of all supported kinds - size_t vbo_size = model_indices * sizeof(uint32_t); - if (model->iqmData) - vbo_size += model_vertices * sizeof(iqm_vertex_t); - else - vbo_size += model_vertices * sizeof(model_vertex_t); + uint32_t geom_count_opaque = 0; + uint32_t geom_count_transparent = 0; + uint32_t geom_count_masked = 0; - buffer_create(&vbo->buffer, vbo_size, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) + { + maliasmesh_t* m = model->meshes + nmesh; - buffer_create(&vbo->staging_buffer, vbo_size, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + if (MAT_IsTransparent(m->materials[0]->flags)) + { + ++geom_count_transparent; + } + else if (MAT_IsMasked(m->materials[0]->flags)) + { + ++geom_count_masked; + } + else + { + ++geom_count_opaque; + } + } - uint32_t* staging_data = (uint32_t*)buffer_map(&vbo->staging_buffer); - int write_ptr = 0; + vkpt_init_model_geometry(&vbo->geom_opaque, geom_count_opaque); + vkpt_init_model_geometry(&vbo->geom_transparent, geom_count_transparent); + vkpt_init_model_geometry(&vbo->geom_masked, geom_count_masked); + } + // Count the triangles and create the geometry descriptors for static geometries. + // *Note*: the descriptor creation depends on the running value of vbo->total_tris. for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) { maliasmesh_t *m = model->meshes + nmesh; - assert(m->numverts > 0); - - m->vertex_offset = write_ptr; - - int num_verts = model->numframes * m->numverts; - - if (model->iqmData) + if (model_is_static) { - for (int nvert = 0; nvert < num_verts; nvert++) + if (MAT_IsTransparent(m->materials[0]->flags)) { - iqm_vertex_t* vtx = (iqm_vertex_t*)(staging_data + write_ptr) + nvert; - memcpy(vtx->position, m->positions + nvert, sizeof(vec3_t)); - memcpy(vtx->normal, m->normals + nvert, sizeof(vec3_t)); - memcpy(vtx->texcoord, m->tex_coords + nvert, sizeof(vec2_t)); - - if (m->tangents) - memcpy(vtx->tangent, m->tangents + nvert, sizeof(vec3_t)); - else - VectorSet(vtx->tangent, 0.f, 0.f, 0.f); - - if (m->blend_indices && m->blend_weights) - { - vtx->blend_indices = m->blend_indices[nvert]; - memcpy(vtx->blend_weights, m->blend_weights + nvert, sizeof(vec4_t)); - } - else - { - vtx->blend_indices = 0; - Vector4Set(vtx->blend_weights, 0.f, 0.f, 0.f, 0.f); - } + vkpt_append_model_geometry(&vbo->geom_transparent, m->numtris, vbo->total_tris, model->name); } - - write_ptr += num_verts * (int)(sizeof(iqm_vertex_t) / sizeof(uint32_t)); - } - else - { - for (int nvert = 0; nvert < num_verts; nvert++) + else if (MAT_IsMasked(m->materials[0]->flags)) { - model_vertex_t* vtx = (model_vertex_t*)(staging_data + write_ptr) + nvert; - memcpy(vtx->position, m->positions + nvert, sizeof(vec3_t)); - memcpy(vtx->normal, m->normals + nvert, sizeof(vec3_t)); - memcpy(vtx->texcoord, m->tex_coords + nvert, sizeof(vec2_t)); + vkpt_append_model_geometry(&vbo->geom_masked, m->numtris, vbo->total_tris, model->name); + } + else + { + vkpt_append_model_geometry(&vbo->geom_opaque, m->numtris, vbo->total_tris, model->name); } - - write_ptr += num_verts * (int)(sizeof(model_vertex_t) / sizeof(uint32_t)); } - - m->idx_offset = write_ptr; - memcpy(staging_data + write_ptr, m->indices, sizeof(uint32_t) * m->numindices); + vbo->total_tris += m->numtris * model->numframes; + } - write_ptr += m->numindices; + vbo->vertex_data_offset = 0; -#if 0 - for (int j = 0; j < num_verts; j++) - Com_Printf("%f %f %f\n", - m->positions[j][0], - m->positions[j][1], - m->positions[j][2]); - - for (int j = 0; j < m->numtris; j++) - Com_Printf("%d %d %d\n", - m->indices[j * 3 + 0], - m->indices[j * 3 + 1], - m->indices[j * 3 + 2]); -#endif + size_t vbo_size = vbo->total_tris * sizeof(VboPrimitive); + size_t staging_size = vbo_size; + + if (model_is_static) + { + vbo->vertex_data_offset = vbo_size; + vbo_size += vbo->total_tris * sizeof(vec3) * 3; + staging_size = vbo_size; -#if 0 - char buf[1024]; - snprintf(buf, sizeof buf, "model_%04d.obj", i); - FILE *f = fopen(buf, "wb+"); - assert(f); - for (int j = 0; j < m->numverts; j++) { - fprintf(f, "v %f %f %f\n", - m->positions[j][0], - m->positions[j][1], - m->positions[j][2]); - } - for (int j = 0; j < m->numindices / 3; j++) { - fprintf(f, "f %d %d %d\n", - m->indices[j * 3 + 0] + 1, - m->indices[j * 3 + 1] + 1, - m->indices[j * 3 + 2] + 1); - } - fclose(f); -#endif + suballocate_model_blas_memory(&vbo->geom_opaque, &vbo_size, model->name); + suballocate_model_blas_memory(&vbo->geom_masked, &vbo_size, model->name); + suballocate_model_blas_memory(&vbo->geom_transparent, &vbo_size, model->name); + } + + const VkBufferUsageFlags accel_usage = + VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR | + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; + + buffer_create(&vbo->buffer, vbo_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | + VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | + VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | + (model_is_static ? accel_usage : 0), + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create(&vbo->staging_buffer, staging_size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); + + ATTACH_LABEL_VARIABLE_NAME(vbo->buffer.buffer, BUFFER, model->name); + ATTACH_LABEL_VARIABLE_NAME(vbo->buffer.memory, DEVICE_MEMORY, model->name); + + if (model_is_static) + { + char name[MAX_QPATH + 16]; + + Q_snprintf(name, sizeof(name), "%s:opaque", model->name); + create_model_blas(&vbo->geom_opaque, vbo->buffer.buffer, name); + + Q_snprintf(name, sizeof(name), "%s:masked", model->name); + create_model_blas(&vbo->geom_masked, vbo->buffer.buffer, name); + + Q_snprintf(name, sizeof(name), "%s:transparent", model->name); + create_model_blas(&vbo->geom_transparent, vbo->buffer.buffer, name); + } + + uint8_t* staging_data = buffer_map(&vbo->staging_buffer); + memset(staging_data, 0, vbo->staging_buffer.size); + int write_ptr = 0; + float* vertex_write_ptr = model_is_static ? (float*)(staging_data + vbo->vertex_data_offset) : NULL; + + for (int nmesh = 0; nmesh < model->nummeshes; nmesh++) + { + maliasmesh_t* m = model->meshes + nmesh; + + m->tri_offset = write_ptr; + + stage_mesh_primitives(staging_data, &write_ptr, &vertex_write_ptr, model, m); } buffer_unmap(&vbo->staging_buffer); @@ -607,6 +1075,33 @@ vkpt_vertex_buffer_upload_models() }; vkCmdCopyBuffer(cmd_buf, vbo->staging_buffer.buffer, vbo->buffer.buffer, 1, ©Region); + + if (vbo->is_static) + { + BUFFER_BARRIER(cmd_buf, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR | VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR, + .buffer = vbo->buffer.buffer, + .offset = 0, + .size = VK_WHOLE_SIZE); + } + + build_model_blas(cmd_buf, &vbo->geom_opaque, vbo->vertex_data_offset, &vbo->buffer); + build_model_blas(cmd_buf, &vbo->geom_transparent, vbo->vertex_data_offset, &vbo->buffer); + build_model_blas(cmd_buf, &vbo->geom_masked, vbo->vertex_data_offset, &vbo->buffer); + + vbo->geom_opaque.instance_mask = AS_FLAG_OPAQUE; + vbo->geom_opaque.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + vbo->geom_opaque.sbt_offset = SBTO_OPAQUE; + + vbo->geom_transparent.instance_mask = AS_FLAG_TRANSPARENT; + vbo->geom_transparent.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR; + vbo->geom_transparent.sbt_offset = SBTO_OPAQUE; + + vbo->geom_masked.instance_mask = AS_FLAG_OPAQUE; + vbo->geom_masked.instance_flags = VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR | VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR; + vbo->geom_masked.sbt_offset = SBTO_MASKED; + } } @@ -631,20 +1126,84 @@ vkpt_vertex_buffer_upload_models() return VK_SUCCESS; } +void create_primbuf() +{ + int primbuf_size = Cvar_ClampInteger(cvar_pt_primbuf, PRIMBUF_SIZE_MIN, PRIMBUF_SIZE_MAX); + + buffer_create(&qvk.buf_primitive_instanced, sizeof(VboPrimitive) * primbuf_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + buffer_create(&qvk.buf_positions_instanced, sizeof(prim_positions_t) * primbuf_size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + + VkDescriptorBufferInfo buf_info = { 0 }; + + VkWriteDescriptorSet output_buf_write = { + .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, + .dstSet = qvk.desc_set_vertex_buffer, + .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + .descriptorCount = 1, + .pBufferInfo = &buf_info, + }; + + output_buf_write.dstBinding = PRIMITIVE_BUFFER_BINDING_IDX; + output_buf_write.dstArrayElement = VERTEX_BUFFER_INSTANCED; + buf_info.buffer = qvk.buf_primitive_instanced.buffer; + buf_info.range = qvk.buf_primitive_instanced.size; + vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); + + output_buf_write.dstBinding = POSITION_BUFFER_BINDING_IDX; + output_buf_write.dstArrayElement = 0; + buf_info.buffer = qvk.buf_positions_instanced.buffer; + buf_info.range = qvk.buf_positions_instanced.size; + vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); + + current_primbuf_size = primbuf_size; +} + +void destroy_primbuf() +{ + buffer_destroy(&qvk.buf_primitive_instanced); + buffer_destroy(&qvk.buf_positions_instanced); +} + +void vkpt_vertex_buffer_ensure_primbuf_size(uint32_t prim_count) +{ + if (prim_count <= current_primbuf_size) + return; + + vkDeviceWaitIdle(qvk.device); + + destroy_primbuf(); + + prim_count = (uint32_t)align(prim_count, PRIMBUF_SIZE_MIN); + Cvar_SetInteger(cvar_pt_primbuf, (int)prim_count, FROM_CODE); + + Com_DPrintf("Resizing the animation buffers to fit all meshes. Set pt_primbuf to at least %d to avoid this.\n", prim_count); + + create_primbuf(); +} + VkResult vkpt_vertex_buffer_create() { + char primbuf_initial_value[16]; + Q_snprintf(primbuf_initial_value, sizeof(primbuf_initial_value), "%d", PRIMBUF_SIZE_DEFAULT); + cvar_pt_primbuf = Cvar_Get("pt_primbuf", primbuf_initial_value, CVAR_ARCHIVE); + VkDescriptorSetLayoutBinding vbo_layout_bindings[] = { { .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = 1, - .binding = BSP_VERTEX_BUFFER_BINDING_IDX, + .descriptorCount = VERTEX_BUFFER_FIRST_MODEL + MAX_MODELS, + .binding = PRIMITIVE_BUFFER_BINDING_IDX, .stageFlags = VK_SHADER_STAGE_ALL, }, { .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, - .binding = MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX, + .binding = POSITION_BUFFER_BINDING_IDX, .stageFlags = VK_SHADER_STAGE_ALL, }, { @@ -698,19 +1257,7 @@ vkpt_vertex_buffer_create() }; _VK(vkCreateDescriptorSetLayout(qvk.device, &layout_info, NULL, &qvk.desc_set_layout_vertex_buffer)); - - buffer_create(&qvk.buf_vertex_bsp, sizeof(BspVertexBuffer), - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - - buffer_create(&qvk.buf_vertex_bsp_staging, sizeof(BspVertexBuffer), - VK_BUFFER_USAGE_TRANSFER_SRC_BIT, - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); - - buffer_create(&qvk.buf_vertex_model_dynamic, sizeof(ModelDynamicVertexBuffer), - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR | VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT, - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - + buffer_create(&qvk.buf_light, sizeof(LightBuffer), VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); @@ -755,6 +1302,8 @@ vkpt_vertex_buffer_create() VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT); } + buffer_create(&null_buffer, 4, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + VkDescriptorPoolSize pool_size = { .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = LENGTH(vbo_layout_bindings) + MAX_MODELS + 128, @@ -779,29 +1328,25 @@ vkpt_vertex_buffer_create() _VK(vkAllocateDescriptorSets(qvk.device, &descriptor_set_alloc_info, &qvk.desc_set_vertex_buffer)); VkDescriptorBufferInfo buf_info = { - .buffer = qvk.buf_vertex_bsp.buffer, + .buffer = null_buffer.buffer, .offset = 0, - .range = sizeof(BspVertexBuffer), + .range = null_buffer.size, }; VkWriteDescriptorSet output_buf_write = { .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, .dstSet = qvk.desc_set_vertex_buffer, - .dstBinding = BSP_VERTEX_BUFFER_BINDING_IDX, - .dstArrayElement = 0, + .dstBinding = PRIMITIVE_BUFFER_BINDING_IDX, + .dstArrayElement = VERTEX_BUFFER_WORLD, .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = 1, .pBufferInfo = &buf_info, }; vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); - - output_buf_write.dstBinding = MODEL_DYNAMIC_VERTEX_BUFFER_BINDING_IDX; - buf_info.buffer = qvk.buf_vertex_model_dynamic.buffer; - buf_info.range = sizeof(ModelDynamicVertexBuffer); - vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); - + output_buf_write.dstBinding = LIGHT_BUFFER_BINDING_IDX; + output_buf_write.dstArrayElement = 0; buf_info.buffer = qvk.buf_light.buffer; buf_info.range = sizeof(LightBuffer); vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); @@ -832,35 +1377,10 @@ vkpt_vertex_buffer_create() buf_info.range = sizeof(SunColorBuffer); vkUpdateDescriptorSets(qvk.device, 1, &output_buf_write, 0, NULL); - - VkDescriptorSetLayoutBinding model_vbo_layout_binding = { - .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, - .descriptorCount = MAX_MODELS, - .binding = 0, - .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT - }; - - VkDescriptorSetLayoutCreateInfo model_vbo_layout_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, - .bindingCount = 1, - .pBindings = &model_vbo_layout_binding, - }; - - _VK(vkCreateDescriptorSetLayout(qvk.device, &model_vbo_layout_info, NULL, &qvk.desc_set_layout_model_vbos)); - - VkDescriptorSetAllocateInfo model_vbo_set_info = { - .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, - .descriptorPool = desc_pool_vertex_buffer, - .descriptorSetCount = 1, - .pSetLayouts = &qvk.desc_set_layout_model_vbos, - }; - - _VK(vkAllocateDescriptorSets(qvk.device, &model_vbo_set_info, &qvk.desc_set_model_vbos)); - + create_primbuf(); + memset(model_vertex_data, 0, sizeof(model_vertex_data)); - buffer_create(&null_buffer, 4, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - for (int i = 0; i < MAX_MODELS; i++) { write_model_vbo_descriptor(i, null_buffer.buffer, null_buffer.size); @@ -893,20 +1413,16 @@ vkpt_vertex_buffer_destroy() desc_pool_vertex_buffer = VK_NULL_HANDLE; qvk.desc_set_layout_vertex_buffer = VK_NULL_HANDLE; - vkDestroyDescriptorSetLayout(qvk.device, qvk.desc_set_layout_model_vbos, NULL); - qvk.desc_set_layout_model_vbos = VK_NULL_HANDLE; + destroy_primbuf(); for (int model = 0; model < MAX_MODELS; model++) { - buffer_destroy(&model_vertex_data[model].buffer); + destroy_model_vbo(&model_vertex_data[model]); } buffer_destroy(&null_buffer); - buffer_destroy(&qvk.buf_vertex_bsp); - buffer_destroy(&qvk.buf_vertex_bsp_staging); - buffer_destroy(&qvk.buf_vertex_model_dynamic); - + buffer_destroy(&qvk.buf_world); buffer_destroy(&qvk.buf_light); buffer_destroy(&qvk.buf_iqm_matrices); buffer_destroy(&qvk.buf_readback); @@ -999,8 +1515,7 @@ vkpt_vertex_buffer_create_pipelines() VkDescriptorSetLayout desc_set_layouts[] = { qvk.desc_set_layout_ubo, - qvk.desc_set_layout_vertex_buffer, - qvk.desc_set_layout_model_vbos + qvk.desc_set_layout_vertex_buffer }; CREATE_PIPELINE_LAYOUT(qvk.device, &pipeline_layout_instance_geometry, @@ -1053,8 +1568,7 @@ vkpt_instance_geometry(VkCommandBuffer cmd_buf, uint32_t num_instances, bool upd { VkDescriptorSet desc_sets[] = { qvk.desc_set_ubo, - qvk.desc_set_vertex_buffer, - qvk.desc_set_model_vbos + qvk.desc_set_vertex_buffer }; vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_instance_geometry); vkCmdBindDescriptorSets(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, @@ -1066,17 +1580,25 @@ vkpt_instance_geometry(VkCommandBuffer cmd_buf, uint32_t num_instances, bool upd { vkCmdBindPipeline(cmd_buf, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_animate_materials); - int num_groups = (((vkpt_refdef.bsp_mesh_world.world_idx_count + vkpt_refdef.bsp_mesh_world.world_transparent_count - + vkpt_refdef.bsp_mesh_world.world_masked_count) / 3) + 255) / 256; - vkCmdDispatch(cmd_buf, num_groups, 1, 1); + const bsp_mesh_t* wm = &vkpt_refdef.bsp_mesh_world; + uint32_t num_static_primitives = 0; + if (wm->geom_opaque.prim_counts) num_static_primitives += wm->geom_opaque.prim_counts[0]; + if (wm->geom_transparent.prim_counts) num_static_primitives += wm->geom_transparent.prim_counts[0]; + if (wm->geom_masked.prim_counts) num_static_primitives += wm->geom_masked.prim_counts[0]; + + if (num_static_primitives != 0) + { + uint num_groups = (num_static_primitives + 255) / 256; + vkCmdDispatch(cmd_buf, num_groups, 1, 1); + } } VkBufferMemoryBarrier barrier = { .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .buffer = qvk.buf_vertex_model_dynamic.buffer, - .size = qvk.buf_vertex_model_dynamic.size, + .buffer = qvk.buf_primitive_instanced.buffer, + .size = qvk.buf_primitive_instanced.size, .srcQueueFamilyIndex = qvk.queue_idx_graphics, .dstQueueFamilyIndex = qvk.queue_idx_graphics }; @@ -1093,4 +1615,25 @@ vkpt_instance_geometry(VkCommandBuffer cmd_buf, uint32_t num_instances, bool upd return VK_SUCCESS; } +bool vkpt_model_is_static(const model_t* model) +{ + if (!model) + return false; + + size_t model_index = model - r_models; + const model_vbo_t* vbo = &model_vertex_data[model_index]; + + return vbo->is_static; +} + +const model_vbo_t* vkpt_get_model_vbo(const model_t* model) +{ + if (!model) + return NULL; + + size_t model_index = model - r_models; + + return &model_vertex_data[model_index]; +} + // vim: shiftwidth=4 noexpandtab tabstop=4 cindent diff --git a/src/refresh/vkpt/vk_util.c b/src/refresh/vkpt/vk_util.c index 08e6a66c7..0c40d13ee 100644 --- a/src/refresh/vkpt/vk_util.c +++ b/src/refresh/vkpt/vk_util.c @@ -147,10 +147,10 @@ VkResult buffer_destroy(BufferResource_t *buf) { assert(!buf->is_mapped); + if (buf->buffer != VK_NULL_HANDLE) + vkDestroyBuffer(qvk.device, buf->buffer, NULL); if(buf->memory != VK_NULL_HANDLE) vkFreeMemory(qvk.device, buf->memory, NULL); - if(buf->buffer != VK_NULL_HANDLE) - vkDestroyBuffer(qvk.device, buf->buffer, NULL); buf->buffer = VK_NULL_HANDLE; buf->memory = VK_NULL_HANDLE; buf->size = 0; diff --git a/src/refresh/vkpt/vkpt.h b/src/refresh/vkpt/vkpt.h index 14adb7743..577515ccf 100644 --- a/src/refresh/vkpt/vkpt.h +++ b/src/refresh/vkpt/vkpt.h @@ -212,7 +212,6 @@ typedef struct QVK_s { VkDebugUtilsMessengerEXT dbg_messenger; VkFence fences_frame_sync[MAX_FRAMES_IN_FLIGHT]; - VkFence fence_vertex_sync; int win_width; @@ -251,13 +250,10 @@ typedef struct QVK_s { VkDescriptorSetLayout desc_set_layout_vertex_buffer; VkDescriptorSet desc_set_vertex_buffer; - - VkDescriptorSetLayout desc_set_layout_model_vbos; - VkDescriptorSet desc_set_model_vbos; - - BufferResource_t buf_vertex_bsp; - BufferResource_t buf_vertex_bsp_staging; - BufferResource_t buf_vertex_model_dynamic; + + BufferResource_t buf_world; + BufferResource_t buf_primitive_instanced; + BufferResource_t buf_positions_instanced; BufferResource_t buf_light; BufferResource_t buf_light_staging[MAX_FRAMES_IN_FLIGHT]; @@ -278,7 +274,7 @@ typedef struct QVK_s { tex_sampler_nearest, tex_sampler_nearest_mipmap_aniso, tex_sampler_linear_clamp; - + float sintab[256]; VkImage screenshot_image; @@ -326,9 +322,42 @@ LIST_EXTENSIONS_INSTANCE #define MAX_SKY_CLUSTERS 1024 -typedef struct bsp_model_s { - uint32_t idx_offset; - uint32_t idx_count; +typedef mat3 prim_positions_t; + +typedef struct +{ + uint8_t* geometry_storage; + VkAccelerationStructureGeometryKHR* geometries; + VkAccelerationStructureBuildRangeInfoKHR* build_ranges; + uint32_t* prim_counts; + uint32_t* prim_offsets; + uint32_t num_geometries; + uint32_t max_geometries; + VkAccelerationStructureBuildSizesInfoKHR build_sizes; + VkDeviceSize blas_data_offset; + VkAccelerationStructureKHR accel; + VkDeviceAddress blas_device_address; + VkGeometryInstanceFlagsKHR instance_flags; + uint32_t instance_mask; + uint32_t sbt_offset; +} model_geometry_t; + +typedef struct { + BufferResource_t buffer; + BufferResource_t staging_buffer; + int registration_sequence; + model_geometry_t geom_opaque; + model_geometry_t geom_transparent; + model_geometry_t geom_masked; + size_t vertex_data_offset; + uint32_t total_tris; + bool is_static; +} model_vbo_t; + +typedef struct +{ + model_geometry_t geometry; + vec3_t center; vec3_t aabb_min; vec3_t aabb_max; @@ -347,36 +376,23 @@ typedef struct aabb_s { } aabb_t; typedef struct bsp_mesh_s { - uint32_t world_idx_count; bsp_model_t *models; int num_models; aabb_t world_aabb; - uint32_t world_transparent_offset; - uint32_t world_transparent_count; - - uint32_t world_masked_offset; - uint32_t world_masked_count; - - uint32_t world_sky_offset; - uint32_t world_sky_count; - - uint32_t world_custom_sky_offset; - uint32_t world_custom_sky_count; + VboPrimitive* primitives; + uint32_t num_primitives_allocated; + uint32_t num_primitives; + size_t vertex_data_offset; - float *positions, *tex_coords; - uint32_t* normals; - uint32_t* tangents; - int *indices; - uint32_t *materials; - float *texel_density; - float *emissive_factors; - int num_indices; - int num_vertices; + model_geometry_t geom_opaque; + model_geometry_t geom_transparent; + model_geometry_t geom_masked; + model_geometry_t geom_sky; + model_geometry_t geom_custom_sky; int num_clusters; - int *clusters; int num_cluster_lights; int *cluster_light_offsets; @@ -402,10 +418,11 @@ void bsp_mesh_create_from_bsp(bsp_mesh_t *wm, bsp_t *bsp, const char* map_name); void bsp_mesh_destroy(bsp_mesh_t *wm); void bsp_mesh_register_textures(bsp_t *bsp); void bsp_mesh_animate_light_polys(bsp_mesh_t *wm); +uint32_t encode_normal(const vec3_t normal); typedef struct vkpt_refdef_s { QVKUniformBuffer_t uniform_buffer; - QVKInstanceBuffer_t uniform_instance_buffer; + InstanceBuffer uniform_instance_buffer; refdef_t *fd; float view_matrix[16]; float projection_matrix[16]; @@ -424,6 +441,8 @@ typedef struct vkpt_refdef_s { extern vkpt_refdef_t vkpt_refdef; +extern BufferResource_t buf_accel_scratch; + typedef struct sun_light_s { vec3_t direction; vec3_t direction_envmap; @@ -488,18 +507,18 @@ typedef enum { typedef struct EntityUploadInfo { uint32_t num_instances; - uint32_t num_vertices; - uint32_t dynamic_vertex_num; - uint32_t transparent_model_vertex_offset; - uint32_t transparent_model_vertex_num; - uint32_t masked_model_vertex_offset; - uint32_t masked_model_vertex_num; - uint32_t viewer_model_vertex_offset; - uint32_t viewer_model_vertex_num; - uint32_t viewer_weapon_vertex_offset; - uint32_t viewer_weapon_vertex_num; - uint32_t explosions_vertex_offset; - uint32_t explosions_vertex_num; + uint32_t num_prims; + uint32_t opqaue_prim_count; + uint32_t transparent_prim_offset; + uint32_t transparent_prim_count; + uint32_t masked_prim_offset; + uint32_t masked_prim_count; + uint32_t viewer_model_prim_offset; + uint32_t viewer_model_prim_count; + uint32_t viewer_weapon_prim_offset; + uint32_t viewer_weapon_prim_count; + uint32_t explosions_prim_offset; + uint32_t explosions_prim_count; bool weapon_left_handed; } EntityUploadInfo; @@ -588,21 +607,29 @@ VkResult vkpt_draw_clear_stretch_pics(); VkResult vkpt_uniform_buffer_create(); VkResult vkpt_uniform_buffer_destroy(); -VkResult vkpt_uniform_buffer_update(VkCommandBuffer command_buffer); +VkResult vkpt_uniform_buffer_upload_to_staging(); +void vkpt_uniform_buffer_copy_from_staging(VkCommandBuffer command_buffer); +void vkpt_init_model_geometry(model_geometry_t* info, uint32_t max_geometries); +void vkpt_destroy_model_geometry(model_geometry_t* info); +void vkpt_append_model_geometry(model_geometry_t* info, uint32_t num_prims, uint32_t prim_offset, const char* model_name); VkResult vkpt_vertex_buffer_create(); VkResult vkpt_vertex_buffer_destroy(); -VkResult vkpt_vertex_buffer_upload_bsp_mesh_to_staging(bsp_mesh_t *bsp_mesh); +void vkpt_vertex_buffer_ensure_primbuf_size(uint32_t prim_count); +VkResult vkpt_vertex_buffer_upload_bsp_mesh(bsp_mesh_t* bsp_mesh); +void vkpt_vertex_buffer_cleanup_bsp_mesh(bsp_mesh_t *bsp_mesh); VkResult vkpt_vertex_buffer_create_pipelines(); VkResult vkpt_vertex_buffer_destroy_pipelines(); VkResult vkpt_instance_geometry(VkCommandBuffer cmd_buf, uint32_t num_instances, bool update_world_animations); +void vkpt_vertex_buffer_invalidate_static_model_vbos(int material_index); VkResult vkpt_vertex_buffer_upload_models(); -VkResult vkpt_vertex_buffer_bsp_upload_staging(); void vkpt_light_buffer_reset_counts(); VkResult vkpt_light_buffer_upload_to_staging(bool render_world, bsp_mesh_t *bsp_mesh, bsp_t* bsp, int num_model_lights, light_poly_t* transformed_model_lights, const float* sky_radiance); VkResult vkpt_light_buffer_upload_staging(VkCommandBuffer cmd_buf); VkResult vkpt_light_stats_create(bsp_mesh_t *bsp_mesh); VkResult vkpt_light_stats_destroy(); +bool vkpt_model_is_static(const model_t* model); +const model_vbo_t* vkpt_get_model_vbo(const model_t* model); VkResult vkpt_iqm_matrix_buffer_upload_staging(VkCommandBuffer cmd_buf); @@ -616,9 +643,10 @@ VkResult vkpt_pt_destroy(); VkResult vkpt_pt_create_pipelines(); VkResult vkpt_pt_destroy_pipelines(); -VkResult vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, bool include_world, bool weapon_left_handed); -VkResult vkpt_pt_create_static(int num_vertices, int num_vertices_transparent, int num_vertices_maksed, int num_vertices_sky, int num_vertices_custom_sky); -void vkpt_pt_destroy_static(); +void vkpt_pt_reset_instances(); +void vkpt_pt_instance_model_blas(const model_geometry_t* geom, const mat4 transform, uint32_t buffer_idx, int model_instance_index); + +VkResult vkpt_pt_create_toplevel(VkCommandBuffer cmd_buf, int idx, const EntityUploadInfo* upload_info, bool weapon_left_handed); VkResult vkpt_pt_trace_primary_rays(VkCommandBuffer cmd_buf); VkResult vkpt_pt_trace_reflections(VkCommandBuffer cmd_buf, int bounce); VkResult vkpt_pt_trace_lighting(VkCommandBuffer cmd_buf, float num_bounce_rays); @@ -667,9 +695,15 @@ VkResult vkpt_shadow_map_initialize(); VkResult vkpt_shadow_map_destroy(); VkResult vkpt_shadow_map_create_pipelines(); VkResult vkpt_shadow_map_destroy_pipelines(); -VkResult vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, int num_static_verts, int num_dynamic_verts, int transparent_offset, int num_transparent_verts); +VkResult vkpt_shadow_map_render(VkCommandBuffer cmd_buf, float* view_projection_matrix, + uint32_t static_offset, uint32_t num_static_verts, + uint32_t dynamic_offset, uint32_t num_dynamic_verts, + uint32_t transparent_offset, uint32_t num_transparent_verts); VkImageView vkpt_shadow_map_get_view(); -void vkpt_shadow_map_setup(const sun_light_t* light, const float* bbox_min, const float* bbox_max, float* VP, float* depth_scale, bool random_sampling); +void vkpt_shadow_map_setup(const sun_light_t* light, const float* bbox_min, const float* bbox_max, + float* VP, float* depth_scale, bool random_sampling); +void vkpt_shadow_map_reset_instances(); +void vkpt_shadow_map_add_instance(const float* model_matrix, VkBuffer buffer, size_t vertex_offset, uint32_t prim_count); int load_img(const char *name, image_t *image); // Transparency module API @@ -741,17 +775,17 @@ typedef struct maliasmesh_s { int numverts; int numtris; int numindices; - int idx_offset; /* offset in vertex buffer on device */ - int vertex_offset; /* offset in vertex buffer on device */ + int tri_offset; /* offset in vertex buffer on device */ int *indices; vec3_t *positions; vec3_t *normals; vec2_t *tex_coords; - vec3_t *tangents; // iqm only + vec3_t *tangents; uint32_t *blend_indices; // iqm only - vec4_t *blend_weights; // iqm only + uint32_t *blend_weights; // iqm only struct pbr_material_s *materials[MAX_ALIAS_SKINS]; int numskins; + bool handedness; } maliasmesh_t; // needed for model.c