intel/bvh: Compile and adapt bvh shaders separately into Xe1/2 and Xe3+

This change separate the encode, header, and copy shader into versions
for Xe1/2 and Xe3+, including adding compile options and handling 64bit
version of instance leaf for Xe3+.

Signed-off-by: Kevin Chuang <kaiwenjon23@gmail.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/33047>
This commit is contained in:
Kevin Chuang 2025-01-31 12:37:44 -08:00 committed by Marge Bot
parent 36433e932b
commit cbc8af4555
5 changed files with 110 additions and 59 deletions

View file

@ -211,24 +211,46 @@ struct anv_internal_node {
#define ANV_INSTANCE_ALL_AABB 0x40
struct instance_leaf_part0 {
/* shader index (24-bits) for software instancing
/* Xe1/2:
* shader index (24-bits) for software instancing
* geometry mask (8-bits) used for ray masking
*
* Xe3+:
* instanceContribution: 24
* geometry mask (8-bits): 8
*/
uint32_t shader_index_and_geom_mask;
uint32_t DW0;
/* instance contribution to hit group index (24-bits)
/* Xe1/2:
* instance contribution to hit group index (24-bits)
* Padding (5-bits)
* DisableOpacityCull (1-bit)
* OpaqueGeometry (1-bit)
* Padding (1-bit)
*
* Xe3+:
* insFlags: 8
* ComparisonMode: 1
* ComparisonValue: 7
* pad0: 8
* subType: 4
* pad1: 1
* DisableOpacityCull: 1
* OpaqueGeometry: 1
* IgnoreRayMultiplier: 1
*/
uint32_t instance_contribution_and_geom_flags;
uint32_t DW1;
/* 48 bit start node of the instanced object
/* Xe1/2:
* 48 bit start node of the instanced object
* instFlags (8-bits)
* Padding (16-bits)
*
* Xe3+:
* 64 bit start node pointer
*/
uint64_t start_node_ptr_and_inst_flags;
uint64_t QW_startNodePtr;
/* 1st row of Worl2Obj transform */
float world2obj_vx_x;

View file

@ -136,11 +136,15 @@ main(void)
DEREF(instance_leaf).part1.bvh_ptr = blas_ptr;
/* set the startNodePtr to blas_ptr + ANV_HEADER_SIZE */
uint64_t mask = 0x0000fffffffffffful;
uint64_t new_startNodePtr = blas_ptr + ANV_RT_BVH_HEADER_SIZE;
#if GFX_VERx10 >= 300
DEREF(instance_leaf).part0.QW_startNodePtr = new_startNodePtr;
#else
uint64_t mask = 0x0000fffffffffffful;
/* clear bits and set */
DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags =
(DEREF(instance_leaf).part0.start_node_ptr_and_inst_flags & ~mask) | (new_startNodePtr & mask);
DEREF(instance_leaf).part0.QW_startNodePtr =
(DEREF(instance_leaf).part0.QW_startNodePtr & ~mask) | (new_startNodePtr & mask);
#endif
}
}
}

View file

@ -29,7 +29,7 @@ layout(push_constant) uniform CONSTS {
encode_args args;
};
uint64_t
uint32_t
get_instance_flag(uint32_t src)
{
uint32_t flags = src & 0xff;
@ -129,20 +129,6 @@ encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node, REF(anv_ac
uint64_t start_node_ptr = uint64_t(src.base_ptr) + DEREF(blas_header).rootNodeOffset;
uint32_t sbt_offset_and_flags = src.sbt_offset_and_flags;
uint32_t shader_index_and_geom_mask = 0;
shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
DEREF(dst_instance).part0.shader_index_and_geom_mask = shader_index_and_geom_mask;
uint32_t instance_contribution_and_geom_flags = 0;
instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff;
instance_contribution_and_geom_flags |= (1 << 29);
instance_contribution_and_geom_flags |=
(get_instance_flag(src.sbt_offset_and_flags >> 24) == ANV_INSTANCE_FLAG_FORCE_OPAQUE ?
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
DEREF(dst_instance).part0.instance_contribution_and_geom_flags =
instance_contribution_and_geom_flags;
uint32_t instance_flags = DEREF(blas_header).instance_flags;
if (((sbt_offset_and_flags >> 24) & (VK_GEOMETRY_INSTANCE_FORCE_OPAQUE_BIT_KHR |
VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR)) != 0) {
@ -152,9 +138,39 @@ encode_leaf_node(uint32_t type, uint64_t src_node, uint64_t dst_node, REF(anv_ac
VK_GEOMETRY_INSTANCE_FORCE_NO_OPAQUE_BIT_KHR);
}
DEREF(dst_instance).part0.start_node_ptr_and_inst_flags =
#if GFX_VERx10 >= 300
DEREF(dst_instance).part0.QW_startNodePtr = start_node_ptr;
uint32_t instance_contribution_and_geom_mask = 0;
instance_contribution_and_geom_mask |= src.sbt_offset_and_flags & 0xffffff;
instance_contribution_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
DEREF(dst_instance).part0.DW0 = instance_contribution_and_geom_mask;
uint32_t inst_flags_and_the_rest = 0;
inst_flags_and_the_rest |= get_instance_flag(instance_flags | (src.sbt_offset_and_flags >> 24));
inst_flags_and_the_rest |= (1 << 29);
inst_flags_and_the_rest |=
((get_instance_flag(src.sbt_offset_and_flags >> 24) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
DEREF(dst_instance).part0.DW1 = inst_flags_and_the_rest;
#else
uint32_t shader_index_and_geom_mask = 0;
shader_index_and_geom_mask |= (src.custom_instance_and_mask & 0xff000000);
DEREF(dst_instance).part0.DW0 = shader_index_and_geom_mask;
uint32_t instance_contribution_and_geom_flags = 0;
instance_contribution_and_geom_flags |= src.sbt_offset_and_flags & 0xffffff;
instance_contribution_and_geom_flags |= (1 << 29);
instance_contribution_and_geom_flags |=
((get_instance_flag(src.sbt_offset_and_flags >> 24) & ANV_INSTANCE_FLAG_FORCE_OPAQUE) != 0 ?
ANV_GEOMETRY_FLAG_OPAQUE : 0) << 30;
DEREF(dst_instance).part0.DW1 = instance_contribution_and_geom_flags;
DEREF(dst_instance).part0.QW_startNodePtr =
(start_node_ptr & ((1ul << 48) - 1)) |
(get_instance_flag(instance_flags | (src.sbt_offset_and_flags >> 24)) << 48);
(uint64_t(get_instance_flag(instance_flags | (src.sbt_offset_and_flags >> 24))) << 48);
#endif
mat4 transform = mat4(src.otw_matrix);

View file

@ -4,21 +4,16 @@
# source file, output name, defines
bvh_shaders = [
[
'encode.comp',
'encode',
[],
],
[
'header.comp',
'header',
[],
],
[
'copy.comp',
'copy',
[]
],
'encode.comp',
'header.comp',
'copy.comp',
]
# A mapping: [filename version, GFX_VERx10 define version]
gfx_versions = [
['125', '125'],
['20', '200'],
['30', '300']
]
anv_bvh_include_dir = dir_source_root + '/src/intel/vulkan/bvh'
@ -29,21 +24,28 @@ anv_bvh_includes = files(
'anv_bvh.h',
)
foreach s : bvh_shaders
command = [
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + anv_bvh_include_dir, '--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@'
]
command += glslang_quiet
foreach shader : bvh_shaders
foreach gfx: gfx_versions
file_name_ver = gfx[0]
define_ver = gfx[1]
command = [
prog_glslang, '-V', '-I' + vk_bvh_include_dir, '-I' + anv_bvh_include_dir,
'--target-env', 'spirv1.5', '-x', '-o', '@OUTPUT@', '@INPUT@',
'-DGFX_VERx10=' + define_ver # so that we can use this macro inside shaders
]
command += glslang_quiet
foreach define : s[2]
command += '-D' + define
endforeach
shader_name = shader.split('.')[0]
output_name = 'gfx' + file_name_ver + '_' + shader_name + '.spv.h'
# By doing this, encode.comp with DGFX_VERx10=300 will be compiled to gfx30_encode.spv.h
# So the genX(encode).spv.h in genX_acceleration_structure.c can find the right file
bvh_spv += custom_target(
s[1] + '.spv.h',
input : s[0],
output : s[1] + '.spv.h',
command : command,
depend_files: [vk_bvh_includes, anv_bvh_includes],
)
bvh_spv += custom_target(
output_name,
input : shader,
output : output_name,
command : command,
depend_files: [vk_bvh_includes, anv_bvh_includes],
)
endforeach
endforeach

View file

@ -229,16 +229,23 @@ debug_record_as_to_bvh_dump(struct anv_cmd_buffer *cmd_buffer,
}
}
#define STRINGIFY_HELPER(x) #x
#define STRINGIFY(x) STRINGIFY_HELPER(x)
#define ENCODE_SPV_PATH STRINGIFY(bvh/genX(encode).spv.h)
#define HEADER_SPV_PATH STRINGIFY(bvh/genX(header).spv.h)
#define COPY_SPV_PATH STRINGIFY(bvh/genX(copy).spv.h)
static const uint32_t encode_spv[] = {
#include "bvh/encode.spv.h"
#include ENCODE_SPV_PATH
};
static const uint32_t header_spv[] = {
#include "bvh/header.spv.h"
#include HEADER_SPV_PATH
};
static const uint32_t copy_spv[] = {
#include "bvh/copy.spv.h"
#include COPY_SPV_PATH
};
static VkResult