radv: Optimize ray tracing position fetch
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Gets rid of a lot of indirection when fetching triangle positions.
Storing the primitive address increases register pressure by a bit but
the traversal shader which should have the highest register demand
should not be affected when position fetch is not used.

Totals:
Instrs: 4021686 -> 4022435 (+0.02%); split: -0.01%, +0.03%
CodeSize: 21235812 -> 21235832 (+0.00%); split: -0.02%, +0.02%
Latency: 23402275 -> 23412110 (+0.04%); split: -0.04%, +0.09%
InvThroughput: 4352818 -> 4352206 (-0.01%); split: -0.04%, +0.02%
VClause: 101906 -> 102058 (+0.15%); split: -0.03%, +0.18%
Copies: 342210 -> 342368 (+0.05%); split: -0.09%, +0.14%
Branches: 114988 -> 114993 (+0.00%)
PreVGPRs: 26551 -> 27111 (+2.11%)
VALU: 2249366 -> 2249524 (+0.01%); split: -0.01%, +0.02%
SALU: 529828 -> 529808 (-0.00%); split: -0.01%, +0.00%

Reviewed-by: Natalie Vock <natalie.vock@gmx.de>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35533>
This commit is contained in:
Konstantin Seurer 2025-06-14 12:22:06 +02:00 committed by Marge Bot
parent 140c625bda
commit df44b353ad
14 changed files with 73 additions and 115 deletions

View file

@ -214,6 +214,7 @@ struct ac_shader_args {
struct ac_arg accel_struct;
struct ac_arg primitive_id;
struct ac_arg instance_addr;
struct ac_arg primitive_addr;
struct ac_arg geometry_id_and_flags;
struct ac_arg hit_kind;
} rt;

View file

@ -71,7 +71,6 @@ struct radv_accel_struct_header {
/* Everything after this gets updated/copied from the CPU. */
uint32_t geometry_type;
uint32_t geometry_count;
uint32_t primitive_base_indices_offset;
uint64_t instance_offset;
uint64_t instance_count;
uint32_t leaf_node_offsets_offset;
@ -182,11 +181,8 @@ struct radv_gfx12_instance_node_user_data {
uint32_t custom_instance;
uint32_t instance_index;
uint32_t bvh_offset;
uint32_t padding;
uint64_t blas_addr;
uint32_t primitive_base_indices_offset;
uint32_t leaf_node_offsets_offset;
uint32_t unused[12];
uint32_t unused[16];
};
/* Size of the primitive header section in bits. */

View file

@ -50,7 +50,8 @@ main(void)
REF(radv_gfx12_instance_node_user_data)(instance_addr + SIZEOF(radv_gfx12_instance_node));
if (args.mode == RADV_COPY_MODE_SERIALIZE) {
DEREF(INDEX(uint64_t, blas_addrs, i)) = DEREF(instance_data).blas_addr;
DEREF(INDEX(uint64_t, blas_addrs, i)) =
node_to_addr(DEREF(instance_node).pointer_flags_bvh_addr) - DEREF(instance_data).bvh_offset;
} else {
uint32_t bvh_offset = DEREF(instance_data).bvh_offset;
@ -59,7 +60,6 @@ main(void)
uint64_t blas_addr = DEREF(INDEX(uint64_t, blas_addrs, i));
DEREF(instance_node).pointer_flags_bvh_addr =
(pointer_flags_bvh_addr & 0xFFC0000000000000ul) | addr_to_node(blas_addr + bvh_offset);
DEREF(instance_data).blas_addr = blas_addr;
}
}
}

View file

@ -309,8 +309,6 @@ radv_encode_instance_gfx12(VOID_REF dst, vk_ir_instance_node src)
DEREF(user_data).custom_instance = src.custom_instance_and_mask & 0xffffff;
DEREF(user_data).instance_index = src.instance_id;
DEREF(user_data).bvh_offset = blas_header.bvh_offset;
DEREF(user_data).blas_addr = src.base_ptr;
DEREF(user_data).primitive_base_indices_offset = blas_header.primitive_base_indices_offset;
DEREF(user_data).leaf_node_offsets_offset = blas_header.leaf_node_offsets_offset;
}

View file

@ -21,6 +21,7 @@
#define MAX_SCRATCH_STACK_ENTRY_COUNT 76
enum radv_ray_intersection_field {
radv_ray_intersection_primitive_addr,
radv_ray_intersection_primitive_id,
radv_ray_intersection_geometry_id_and_flags,
radv_ray_intersection_instance_addr,
@ -44,6 +45,7 @@ radv_get_intersection_type()
.name = #field_name, \
}
FIELD(primitive_addr, glsl_uint64_t_type());
FIELD(primitive_id, glsl_uint_type());
FIELD(geometry_id_and_flags, glsl_uint_type());
FIELD(instance_addr, glsl_uint64_t_type());
@ -192,6 +194,7 @@ copy_candidate_to_closest(nir_builder *b, nir_deref_instr *rq)
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
isec_copy(b, closest, candidate, barycentrics);
isec_copy(b, closest, candidate, primitive_addr);
isec_copy(b, closest, candidate, geometry_id_and_flags);
isec_copy(b, closest, candidate, instance_addr);
isec_copy(b, closest, candidate, intersection_type);
@ -387,11 +390,8 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
case nir_ray_query_value_world_ray_origin:
return rq_load(b, rq, origin);
case nir_ray_query_value_intersection_triangle_vertex_positions: {
nir_def *instance_node_addr = isec_load(b, intersection, instance_addr);
nir_def *primitive_id = isec_load(b, intersection, primitive_id);
nir_def *geometry_id = nir_iand_imm(b, isec_load(b, intersection, geometry_id_and_flags), 0xFFFFFF);
return radv_load_vertex_position(device, b, instance_node_addr, geometry_id, primitive_id,
nir_intrinsic_column(instr));
nir_def *primitive_addr = isec_load(b, intersection, primitive_addr);
return radv_load_vertex_position(device, b, primitive_addr, nir_intrinsic_column(instr));
}
default:
unreachable("Invalid nir_ray_query_value!");
@ -414,6 +414,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
nir_deref_instr *candidate = rq_deref(b, data->rq, candidate);
isec_store(b, candidate, primitive_addr, intersection->node_addr);
isec_store(b, candidate, primitive_id, intersection->primitive_id);
isec_store(b, candidate, geometry_id_and_flags, intersection->geometry_id_and_flags);
isec_store(b, candidate, opaque, intersection->opaque);
@ -434,6 +435,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_deref_instr *candidate = rq_deref(b, data->rq, candidate);
isec_store(b, candidate, barycentrics, intersection->barycentrics);
isec_store(b, candidate, primitive_addr, intersection->base.node_addr);
isec_store(b, candidate, primitive_id, intersection->base.primitive_id);
isec_store(b, candidate, geometry_id_and_flags, intersection->base.geometry_id_and_flags);
isec_store(b, candidate, t, intersection->t);

View file

@ -357,42 +357,17 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool tr
}
nir_def *
radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def *geometry_id,
nir_def *primitive_id, uint32_t index)
radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (radv_use_bvh8(pdev)) {
nir_def *addr_offsets =
nir_build_load_global(b, 4, 32,
nir_iadd_imm(b, instance_addr,
sizeof(struct radv_gfx12_instance_node) +
offsetof(struct radv_gfx12_instance_node_user_data, blas_addr)));
nir_def *bvh_offset =
nir_build_load_global(b, 1, 32,
nir_iadd_imm(b, instance_addr,
sizeof(struct radv_gfx12_instance_node) +
offsetof(struct radv_gfx12_instance_node_user_data, bvh_offset)));
nir_def *addr = nir_pack_64_2x32(b, nir_channels(b, addr_offsets, 0x3));
nir_def *base_index_offset =
nir_iadd(b, nir_channel(b, addr_offsets, 2), nir_imul_imm(b, geometry_id, sizeof(uint32_t)));
nir_def *base_index = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, base_index_offset)));
nir_def *offset_offset = nir_iadd(b, nir_channel(b, addr_offsets, 3),
nir_imul_imm(b, nir_iadd(b, base_index, primitive_id), sizeof(uint32_t)));
nir_def *offset = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, offset_offset)));
offset = nir_iadd(b, offset, bvh_offset);
/* Assume that vertices are uncompressed. */
offset = nir_iadd_imm(b, offset,
ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float));
uint32_t offset = ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float);
nir_def *data[4];
for (uint32_t i = 0; i < ARRAY_SIZE(data); i++) {
data[i] = nir_build_load_global(b, 1, 32, nir_iadd(b, addr, nir_u2u64(b, offset)));
offset = nir_iadd_imm(b, offset, 4);
data[i] = nir_build_load_global(b, 1, 32, nir_iadd_imm(b, primitive_addr, offset));
offset += 4;
}
uint32_t subdword_offset = RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE % 32;
@ -407,23 +382,8 @@ radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *i
return nir_vec3(b, vertices[0], vertices[1], vertices[2]);
}
nir_def *bvh_addr_id =
nir_build_load_global(b, 1, 64, nir_iadd_imm(b, instance_addr, offsetof(struct radv_bvh_instance_node, bvh_ptr)));
nir_def *bvh_addr = build_node_to_addr(device, b, bvh_addr_id, true);
nir_def *bvh_offset = nir_build_load_global(
b, 1, 32, nir_iadd_imm(b, instance_addr, offsetof(struct radv_bvh_instance_node, bvh_offset)));
nir_def *accel_struct = nir_isub(b, bvh_addr, nir_u2u64(b, bvh_offset));
nir_def *base_indices_offset = nir_build_load_global(
b, 1, 32,
nir_iadd_imm(b, accel_struct, offsetof(struct radv_accel_struct_header, primitive_base_indices_offset)));
nir_def *base_index_offset = nir_iadd(b, base_indices_offset, nir_imul_imm(b, geometry_id, sizeof(uint32_t)));
nir_def *base_index = nir_build_load_global(b, 1, 32, nir_iadd(b, accel_struct, nir_u2u64(b, base_index_offset)));
nir_def *offset = nir_imul_imm(b, nir_iadd(b, base_index, primitive_id), sizeof(struct radv_bvh_triangle_node));
offset = nir_iadd_imm(b, offset, sizeof(struct radv_bvh_box32_node) + index * 3 * sizeof(float));
return nir_build_load_global(b, 3, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
uint32_t offset = index * 3 * sizeof(float);
return nir_build_load_global(b, 3, 32, nir_iadd_imm(b, primitive_addr, offset));
}
void

View file

@ -24,8 +24,7 @@ nir_def *build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *instance_addr,
nir_def *geometry_id, nir_def *primitive_id, uint32_t index);
nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index);
void radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);

View file

@ -202,6 +202,7 @@ struct rt_variables {
nir_variable *tmax;
/* Properties of the primitive currently being visited. */
nir_variable *primitive_addr;
nir_variable *primitive_id;
nir_variable *geometry_id_and_flags;
nir_variable *instance_addr;
@ -253,6 +254,7 @@ create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipe
vars.direction = nir_variable_create(shader, nir_var_shader_temp, vec3_type, "ray_direction");
vars.tmax = nir_variable_create(shader, nir_var_shader_temp, glsl_float_type(), "ray_tmax");
vars.primitive_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "primitive_addr");
vars.primitive_id = nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "primitive_id");
vars.geometry_id_and_flags =
nir_variable_create(shader, nir_var_shader_temp, glsl_uint_type(), "geometry_id_and_flags");
@ -299,6 +301,7 @@ map_rt_variables(struct hash_table *var_remap, struct rt_variables *src, const s
_mesa_hash_table_insert(var_remap, src->direction, dst->direction);
_mesa_hash_table_insert(var_remap, src->tmax, dst->tmax);
_mesa_hash_table_insert(var_remap, src->primitive_addr, dst->primitive_addr);
_mesa_hash_table_insert(var_remap, src->primitive_id, dst->primitive_id);
_mesa_hash_table_insert(var_remap, src->geometry_id_and_flags, dst->geometry_id_and_flags);
_mesa_hash_table_insert(var_remap, src->instance_addr, dst->instance_addr);
@ -322,6 +325,8 @@ create_inner_vars(nir_builder *b, const struct rt_variables *vars)
inner_vars.idx = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_idx");
inner_vars.shader_record_ptr =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_shader_record_ptr");
inner_vars.primitive_addr =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint64_t_type(), "inner_primitive_addr");
inner_vars.primitive_id =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "inner_primitive_id");
inner_vars.geometry_id_and_flags =
@ -649,10 +654,11 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
}
case nir_intrinsic_execute_closest_hit_amd: {
nir_store_var(b, vars->tmax, intr->src[1].ssa, 0x1);
nir_store_var(b, vars->primitive_id, intr->src[2].ssa, 0x1);
nir_store_var(b, vars->instance_addr, intr->src[3].ssa, 0x1);
nir_store_var(b, vars->geometry_id_and_flags, intr->src[4].ssa, 0x1);
nir_store_var(b, vars->hit_kind, intr->src[5].ssa, 0x1);
nir_store_var(b, vars->primitive_addr, intr->src[2].ssa, 0x1);
nir_store_var(b, vars->primitive_id, intr->src[3].ssa, 0x1);
nir_store_var(b, vars->instance_addr, intr->src[4].ssa, 0x1);
nir_store_var(b, vars->geometry_id_and_flags, intr->src[5].ssa, 0x1);
nir_store_var(b, vars->hit_kind, intr->src[6].ssa, 0x1);
load_sbt_entry(b, vars, intr->src[0].ssa, SBT_HIT, SBT_RECURSIVE_PTR);
nir_def *should_return =
@ -689,11 +695,8 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
break;
}
case nir_intrinsic_load_ray_triangle_vertex_positions: {
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
nir_def *primitive_id = nir_load_var(b, vars->primitive_id);
nir_def *geometry_id = nir_iand_imm(b, nir_load_var(b, vars->geometry_id_and_flags), 0xFFFFFFF);
ret = radv_load_vertex_position(vars->device, b, instance_node_addr, geometry_id, primitive_id,
nir_intrinsic_column(intr));
nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr);
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
break;
}
default:
@ -1385,6 +1388,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
{
struct rt_variables inner_vars = create_inner_vars(b, data->vars);
nir_store_var(b, inner_vars.primitive_addr, intersection->base.node_addr, 1);
nir_store_var(b, inner_vars.primitive_id, intersection->base.primitive_id, 1);
nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1);
nir_store_var(b, inner_vars.tmax, intersection->t, 0x1);
@ -1418,6 +1422,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_push_if(b, nir_load_var(b, data->vars->ahit_accept));
{
nir_store_var(b, data->vars->primitive_addr, intersection->base.node_addr, 1);
nir_store_var(b, data->vars->primitive_id, intersection->base.primitive_id, 1);
nir_store_var(b, data->vars->geometry_id_and_flags, intersection->base.geometry_id_and_flags, 1);
nir_store_var(b, data->vars->tmax, intersection->t, 0x1);
@ -1452,6 +1457,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
* next closest hit candidate. */
inner_vars.hit_kind = data->vars->hit_kind;
nir_store_var(b, inner_vars.primitive_addr, intersection->node_addr, 1);
nir_store_var(b, inner_vars.primitive_id, intersection->primitive_id, 1);
nir_store_var(b, inner_vars.geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
nir_store_var(b, inner_vars.tmax, nir_load_var(b, data->vars->tmax), 0x1);
@ -1481,6 +1487,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
nir_push_if(b, nir_load_var(b, data->vars->ahit_accept));
{
nir_store_var(b, data->vars->primitive_addr, intersection->node_addr, 1);
nir_store_var(b, data->vars->primitive_id, intersection->primitive_id, 1);
nir_store_var(b, data->vars->geometry_id_and_flags, intersection->geometry_id_and_flags, 1);
nir_store_var(b, data->vars->tmax, nir_load_var(b, inner_vars.tmax), 0x1);
@ -1673,7 +1680,14 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
} else {
for (int i = 0; i < ARRAY_SIZE(hit_attribs); ++i)
nir_store_hit_attrib_amd(b, nir_load_var(b, hit_attribs[i]), .base = i);
nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax),
nir_def *primitive_addr;
if (info->has_position_fetch)
primitive_addr = nir_load_var(b, vars->primitive_addr);
else
primitive_addr = nir_undef(b, 1, 64);
nir_execute_closest_hit_amd(b, nir_load_var(b, vars->idx), nir_load_var(b, vars->tmax), primitive_addr,
nir_load_var(b, vars->primitive_id), nir_load_var(b, vars->instance_addr),
nir_load_var(b, vars->geometry_id_and_flags), nir_load_var(b, vars->hit_kind));
}
@ -1920,7 +1934,8 @@ void
radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_shader_args *args, const struct radv_shader_info *info, uint32_t *stack_size,
bool resume_shader, struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
bool monolithic, const struct radv_ray_tracing_stage_info *traversal_info)
bool monolithic, bool has_position_fetch,
const struct radv_ray_tracing_stage_info *traversal_info)
{
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
@ -1999,6 +2014,8 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
else
nir_store_var(&b, vars.miss_index, ac_nir_load_arg(&b, &args->ac, args->ac.rt.miss_index), 0x1);
nir_def *primitive_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_addr);
nir_store_var(&b, vars.primitive_addr, nir_pack_64_2x32(&b, primitive_addr), 1);
nir_store_var(&b, vars.primitive_id, ac_nir_load_arg(&b, &args->ac, args->ac.rt.primitive_id), 1);
nir_def *instance_addr = ac_nir_load_arg(&b, &args->ac, args->ac.rt.instance_addr);
nir_store_var(&b, vars.instance_addr, nir_pack_64_2x32(&b, instance_addr), 1);
@ -2071,6 +2088,9 @@ radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKH
radv_store_arg(&b, args, traversal_info, args->ac.rt.ray_direction, nir_load_var(&b, vars.direction));
radv_store_arg(&b, args, traversal_info, args->ac.rt.ray_tmax, nir_load_var(&b, vars.tmax));
if (has_position_fetch)
radv_store_arg(&b, args, traversal_info, args->ac.rt.primitive_addr, nir_load_var(&b, vars.primitive_addr));
radv_store_arg(&b, args, traversal_info, args->ac.rt.primitive_id, nir_load_var(&b, vars.primitive_id));
radv_store_arg(&b, args, traversal_info, args->ac.rt.instance_addr, nir_load_var(&b, vars.instance_addr));
radv_store_arg(&b, args, traversal_info, args->ac.rt.geometry_id_and_flags,

View file

@ -50,7 +50,6 @@ static const uint32_t leaf_spv[] = {
struct acceleration_structure_layout {
uint32_t geometry_info_offset;
uint32_t primitive_base_indices_offset;
uint32_t leaf_node_offsets_offset;
uint32_t bvh_offset;
uint32_t leaf_nodes_offset;
@ -126,11 +125,6 @@ radv_get_acceleration_structure_layout(struct radv_device *device,
offset += sizeof(struct radv_accel_struct_geometry_info) * state->build_info->geometryCount;
}
if (device->vk.enabled_features.rayTracingPositionFetch && geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
accel_struct->primitive_base_indices_offset = offset;
offset += sizeof(uint32_t) * state->build_info->geometryCount;
}
/* On GFX12, we need additional space for leaf node offsets since they do not have the same
* order as the application provided data.
*/
@ -667,7 +661,6 @@ radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_str
header.build_flags = state->build_info->flags;
header.geometry_type = vk_get_as_geometry_type(state->build_info);
header.geometry_count = state->build_info->geometryCount;
header.primitive_base_indices_offset = layout.primitive_base_indices_offset;
radv_update_memory_cp(cmd_buffer, vk_acceleration_structure_get_va(dst) + base, (const char *)&header + base,
sizeof(header) - base);
@ -692,27 +685,6 @@ radv_init_header(VkCommandBuffer commandBuffer, const struct vk_acceleration_str
free(geometry_infos);
}
VkGeometryTypeKHR geometry_type = vk_get_as_geometry_type(state->build_info);
if (device->vk.enabled_features.rayTracingPositionFetch && geometry_type == VK_GEOMETRY_TYPE_TRIANGLES_KHR) {
uint32_t base_indices_size = sizeof(uint32_t) * state->build_info->geometryCount;
uint32_t *base_indices = malloc(base_indices_size);
if (!base_indices) {
vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
return;
}
uint32_t base_index = 0;
for (uint32_t i = 0; i < state->build_info->geometryCount; i++) {
base_indices[i] = base_index;
base_index += state->build_range_infos[i].primitiveCount;
}
radv_CmdUpdateBuffer(commandBuffer, vk_buffer_to_handle(dst->buffer),
dst->offset + layout.primitive_base_indices_offset, base_indices_size, base_indices);
free(base_indices);
}
}
static void

View file

@ -364,7 +364,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
struct radv_ray_tracing_stage_info *stage_info,
const struct radv_ray_tracing_stage_info *traversal_stage_info,
struct radv_serialized_shader_arena_block *replay_block, bool skip_shaders_cache,
struct radv_shader **out_shader)
bool has_position_fetch, struct radv_shader **out_shader)
{
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
@ -426,7 +426,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
struct radv_shader_stage temp_stage = *stage;
temp_stage.nir = shaders[i];
radv_nir_lower_rt_abi(temp_stage.nir, pCreateInfo, &temp_stage.args, &stage->info, stack_size, i > 0, device,
pipeline, monolithic, traversal_stage_info);
pipeline, monolithic, has_position_fetch, traversal_stage_info);
/* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
nir_shader_gather_info(temp_stage.nir, nir_shader_get_entrypoint(temp_stage.nir));
@ -547,6 +547,9 @@ radv_gather_ray_tracing_stage_info(nir_shader *nir)
continue;
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
if (intr->intrinsic == nir_intrinsic_load_ray_triangle_vertex_positions)
info.has_position_fetch = true;
if (intr->intrinsic != nir_intrinsic_trace_ray)
continue;
@ -632,10 +635,13 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
}
bool has_callable = false;
/* Libraries cannot know how they are used so we need to asssume that position fetch is used. */
bool has_position_fetch = library;
/* TODO: Recompile recursive raygen shaders instead. */
bool raygen_imported = false;
for (uint32_t i = 0; i < pipeline->stage_count; i++) {
has_callable |= rt_stages[i].stage == MESA_SHADER_CALLABLE;
has_position_fetch |= rt_stages[i].info.has_position_fetch;
monolithic &= rt_stages[i].info.can_inline;
if (i >= pCreateInfo->stageCount)
@ -691,9 +697,9 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
bool monolithic_raygen = monolithic && stage->stage == MESA_SHADER_RAYGEN;
result =
radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, monolithic_raygen, stage, &stack_size,
&rt_stages[idx].info, NULL, replay_block, skip_shaders_cache, &rt_stages[idx].shader);
result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, monolithic_raygen, stage, &stack_size,
&rt_stages[idx].info, NULL, replay_block, skip_shaders_cache, has_position_fetch,
&rt_stages[idx].shader);
if (result != VK_SUCCESS)
goto cleanup;
@ -720,6 +726,7 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
struct radv_ray_tracing_stage_info traversal_info = {
.set_flags = 0xFFFFFFFF,
.unset_flags = 0xFFFFFFFF,
.has_position_fetch = has_position_fetch,
};
memset(traversal_info.unused_args, 0xFF, sizeof(traversal_info.unused_args));
@ -750,9 +757,9 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
.key = stage_keys[MESA_SHADER_INTERSECTION],
};
radv_shader_layout_init(pipeline_layout, MESA_SHADER_INTERSECTION, &traversal_stage.layout);
result =
radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, false, &traversal_stage, NULL, NULL, &traversal_info,
NULL, skip_shaders_cache, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
result = radv_rt_nir_to_asm(device, cache, pCreateInfo, pipeline, false, &traversal_stage, NULL, NULL,
&traversal_info, NULL, skip_shaders_cache, has_position_fetch,
&pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]);
ralloc_free(traversal_nir);
cleanup:

View file

@ -74,6 +74,7 @@ struct radv_rt_const_arg_info {
struct radv_ray_tracing_stage_info {
bool can_inline;
bool has_position_fetch;
BITSET_DECLARE(unused_args, AC_MAX_ARGS);

View file

@ -526,7 +526,7 @@ struct radv_ray_tracing_stage_info;
void radv_nir_lower_rt_abi(nir_shader *shader, const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,
const struct radv_shader_args *args, const struct radv_shader_info *info,
uint32_t *stack_size, bool resume_shader, struct radv_device *device,
struct radv_ray_tracing_pipeline *pipeline, bool monolithic,
struct radv_ray_tracing_pipeline *pipeline, bool monolithic, bool has_position_fetch,
const struct radv_ray_tracing_stage_info *traversal_info);
void radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *nir);

View file

@ -352,6 +352,7 @@ radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_arg
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.miss_index);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.instance_addr);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_CONST_PTR, &args->ac.rt.primitive_addr);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.primitive_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.geometry_id_and_flags);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.rt.hit_kind);

View file

@ -1897,11 +1897,12 @@ system_value("cull_mask_and_flags_amd", 1)
# 0. SBT Index
# 1. Ray Tmax
# 2. Primitive Id
# 3. Instance Addr
# 4. Geometry Id and Flags
# 5. Hit Kind
intrinsic("execute_closest_hit_amd", src_comp=[1, 1, 1, 1, 1, 1])
# 2. Primitive Addr
# 3. Primitive Id
# 4. Instance Addr
# 5. Geometry Id and Flags
# 6. Hit Kind
intrinsic("execute_closest_hit_amd", src_comp=[1, 1, 1, 1, 1, 1, 1])
# 0. Ray Tmax
intrinsic("execute_miss_amd", src_comp=[1])