radv: use radv_compiler_info everywhere during compilation

This prevents the compiler to access the logical/physical devices and
the instance during compilation.

The main goal is to make it more robust against cache related issues
when something isn't hashed correctly (this used to happen a lot in the
past). Also it would be much more robust for sharing binaries between
two GPUs in the same generation (eg. Vangogh/Rembrandt) because
everything needed for compilation is in radv_compiler_info. There is
still some work to do to achieve that but it's making good progress.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40992>
This commit is contained in:
Samuel Pitoiset 2026-02-25 11:40:13 +01:00 committed by Marge Bot
parent 4a91fd8bab
commit 371316e989
39 changed files with 831 additions and 929 deletions

View file

@ -17,17 +17,17 @@ extern "C" {
#endif
typedef struct nir_shader nir_shader;
struct radeon_info;
struct radv_shader_stage;
struct radv_shader_info;
struct radv_shader_args;
struct radv_shader_layout;
struct radv_device;
struct radv_graphics_state_key;
struct radv_ps_epilog_key;
struct radv_debug_nir;
struct radv_compiler_info;
bool radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage);
bool radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *stage);
bool radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage,
const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi);
@ -36,10 +36,11 @@ bool radv_nir_lower_hit_attrib_derefs(nir_shader *shader);
bool radv_nir_lower_ray_payload_derefs(nir_shader *shader, uint32_t offset);
bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device);
bool radv_nir_lower_ray_queries(nir_shader *shader, const struct radv_compiler_info *compiler_info);
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info);
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *vs_stage,
const struct radv_graphics_state_key *gfx_state);
bool radv_nir_optimize_vs_inputs_to_const(nir_shader *shader, const struct radv_graphics_state_key *gfx_state);
@ -63,7 +64,7 @@ unsigned radv_map_io_driver_location(unsigned semantic);
void radv_nir_lower_io(nir_shader *nir);
bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage);
bool radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage);
bool radv_nir_lower_cooperative_matrix(nir_shader *shader, enum amd_gfx_level gfx_level, unsigned wave_size);
@ -95,7 +96,7 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi
bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state,
unsigned vgt_outprim_type);
bool radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device,
bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *stage);
void radv_nir_lower_callee_signature(nir_function *function);

View file

@ -10,9 +10,7 @@
#include "nir_builder.h"
#include "radv_descriptor_set.h"
#include "radv_descriptors.h"
#include "radv_device.h"
#include "radv_nir.h"
#include "radv_physical_device.h"
#include "radv_shader.h"
#include "radv_shader_args.h"
#include "sid.h"
@ -676,21 +674,22 @@ lower_descriptors_tex(nir_builder *b, lower_descriptors_state *state, nir_tex_in
}
bool
radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *stage)
{
bool progress = false;
const struct radv_physical_device *pdev = radv_device_physical(device);
lower_descriptors_state state = {
.gfx_level = pdev->info.gfx_level,
.address32_hi = pdev->info.address32_hi,
.sampled_image_desc_size = radv_get_sampled_image_desc_size(pdev),
.combined_image_sampler_desc_size = radv_get_combined_image_sampler_desc_size(pdev),
.combined_image_sampler_offset = radv_get_combined_image_sampler_offset(pdev),
.disable_aniso_single_level = pdev->cache_key.disable_aniso_single_level,
.has_image_load_dcc_bug = pdev->info.compiler_info.has_image_load_dcc_bug,
.gfx_level = compiler_info->ac->gfx_level,
.address32_hi = compiler_info->hw.address32_hi,
.sampled_image_desc_size = compiler_info->sampled_image_desc_size,
.combined_image_sampler_desc_size = compiler_info->combined_image_sampler_desc_size,
.combined_image_sampler_offset = compiler_info->combined_image_sampler_offset,
.disable_aniso_single_level =
compiler_info->cache_key->disable_aniso_single_level && compiler_info->ac->gfx_level < GFX8,
.has_image_load_dcc_bug = compiler_info->ac->has_image_load_dcc_bug,
.disable_tg4_trunc_coord =
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
!compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord,
.args = &stage->args,
.info = &stage->info,
.layout = &stage->layout,

View file

@ -7,7 +7,6 @@
#include "nir.h"
#include "nir_builder.h"
#include "radv_descriptor_set.h"
#include "radv_device.h"
#include "radv_nir.h"
#include "radv_physical_device.h"
#include "radv_sampler.h"
@ -19,8 +18,7 @@
* forbidden).
*/
typedef struct {
const struct radv_device *device;
bool disable_tg4_trunc_coord;
const struct radv_compiler_info *compiler_info;
const struct radv_shader_layout *layout;
} lower_immediate_samplers_state;
@ -31,8 +29,10 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
b->cursor = nir_before_instr(&tex->instr);
const bool disable_tg4_trunc_coord =
!state->compiler_info->ac->conformant_trunc_coord && !state->compiler_info->cache_key->disable_trunc_coord;
const uint32_t dword0_mask =
tex->op == nir_texop_tg4 && state->disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu;
tex->op == nir_texop_tg4 && disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu;
if (tex->embedded_sampler) {
const struct vk_sampler_state_array *embedded_samplers = &state->layout->embedded_samplers;
@ -42,7 +42,7 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
assert(sampler_idx < embedded_samplers->sampler_count);
const struct vk_sampler_state *sampler_state = &embedded_samplers->samplers[sampler_idx];
radv_make_sampler_descriptor(state->device, sampler_state, desc);
radv_make_sampler_descriptor(state->compiler_info, sampler_state, desc);
nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
nir_imm_ivec4(b, desc[0] & dword0_mask, desc[1], desc[2], desc[3]));
@ -89,14 +89,11 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
}
bool
radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
lower_immediate_samplers_state state = {
.device = device,
.disable_tg4_trunc_coord =
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
.compiler_info = compiler_info,
.layout = &stage->layout,
};

View file

@ -10,9 +10,7 @@
#include "nir.h"
#include "nir_builder.h"
#include "nir_tcs_info.h"
#include "radv_device.h"
#include "radv_nir.h"
#include "radv_physical_device.h"
#include "radv_shader.h"
static int
@ -85,9 +83,8 @@ radv_map_io_driver_location(unsigned semantic)
}
bool
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_shader_info *info = &stage->info;
ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
@ -95,16 +92,16 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
if (nir->info.stage == MESA_SHADER_VERTEX) {
if (info->vs.as_ls) {
NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, compiler_info->ac->gfx_level,
info->vs.tcs_in_out_eq, info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
return true;
} else if (info->vs.as_es) {
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize,
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize,
info->gs_inputs_read);
return true;
}
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, info->vs.tcs_in_out_eq,
info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
nir_tcs_info tcs_info;
@ -113,21 +110,21 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
ac_nir_get_tess_io_info(nir, &tcs_info, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, map_output,
true, &tess_io_info);
NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output, pdev->info.gfx_level,
info->wave_size);
NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output,
compiler_info->ac->gfx_level, info->wave_size);
return true;
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS(_, nir, ac_nir_lower_tes_inputs_to_mem, map_input);
if (info->tes.as_es) {
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize,
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize,
info->gs_inputs_read);
}
return true;
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_lower_task_outputs_to_mem(nir, info->cs.has_query);

View file

@ -154,7 +154,7 @@ struct ray_query_vars {
static void
init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray_query_vars *dst, const char *base_name,
const struct radv_physical_device *pdev)
const struct radv_compiler_info *compiler_info)
{
memset(dst, 0, sizeof(*dst));
@ -162,23 +162,23 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2];
uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8;
/* ds_bvh_stack* instructions use a fixed stride of 32 dwords. */
if (radv_use_bvh_stack_rtn(pdev))
if (radv_use_bvh_stack_rtn(compiler_info))
workgroup_size = align(workgroup_size, 32);
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
uint32_t shared_offset = align(shader->info.shared_size, 4);
if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) ||
shared_offset + shared_stack_size > pdev->info.lds_size_per_workgroup) {
shared_offset + shared_stack_size > compiler_info->hw.lds_size_per_workgroup) {
dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
} else {
if (radv_use_bvh_stack_rtn(pdev)) {
if (radv_use_bvh_stack_rtn(compiler_info)) {
/* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords, or 16383 dwords on
* gfx12+.
*/
uint32_t num_wave32_groups = workgroup_size / 32;
uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries;
uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base;
uint32_t max_hw_stack_base = pdev->info.gfx_level >= GFX12 ? 16384 : 8192;
uint32_t max_hw_stack_base = compiler_info->ac->gfx_level >= GFX12 ? 16384 : 8192;
dst->use_bvh_stack_rtn = max_stack_base < max_hw_stack_base;
}
dst->shared_stack = true;
@ -194,11 +194,11 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
static void
lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht,
const struct radv_physical_device *pdev)
const struct radv_compiler_info *compiler_info)
{
struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars);
init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, pdev);
init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, compiler_info);
_mesa_hash_table_insert(ht, ray_query, vars);
}
@ -266,10 +266,8 @@ enum rq_intersection_type { intersection_type_none, intersection_type_triangle,
static void
lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
struct radv_device *device)
const struct radv_compiler_info *compiler_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_deref_instr *closest = rq_deref(b, rq, closest);
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
@ -307,7 +305,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
bvh_offset = nir_if_phi(b, bvh_offset, zero);
nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
bvh_base = build_addr_to_node(device, b, bvh_base, instr->src[2].ssa);
bvh_base = build_addr_to_node(compiler_info, b, bvh_base, instr->src[2].ssa);
rq_store(b, rq, root_bvh_base, bvh_base);
rq_store(b, rq, trav_bvh_base, bvh_base);
@ -317,8 +315,8 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
if (vars->use_bvh_stack_rtn) {
uint32_t workgroup_size =
b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
nir_def *addr =
radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, workgroup_size, vars->shared_base, vars->stack_entries);
nir_def *addr = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, workgroup_size, vars->shared_base,
vars->stack_entries);
rq_store(b, rq, trav_stack, addr);
rq_store(b, rq, trav_stack_low_watermark, addr);
} else {
@ -340,13 +338,14 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
rq_store(b, rq, trav_top_stack, nir_imm_int(b, -1));
rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !pdev->cache_key.no_rt));
rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !compiler_info->cache_key->no_rt));
vars->initialize = instr;
}
static nir_def *
lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *instr, nir_deref_instr *rq)
lower_rq_load(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_intrinsic_instr *instr,
nir_deref_instr *rq)
{
bool committed = nir_intrinsic_committed(instr);
@ -369,24 +368,24 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
case nir_ray_query_value_intersection_geometry_index:
return nir_iand_imm(b, isec_load(b, intersection, geometry_id_and_flags), 0xFFFFFF);
case nir_ray_query_value_intersection_instance_custom_index:
return radv_load_custom_instance(device, b, isec_load(b, intersection, instance_addr));
return radv_load_custom_instance(compiler_info, b, isec_load(b, intersection, instance_addr));
case nir_ray_query_value_intersection_instance_id:
return radv_load_instance_id(device, b, isec_load(b, intersection, instance_addr));
return radv_load_instance_id(compiler_info, b, isec_load(b, intersection, instance_addr));
case nir_ray_query_value_intersection_instance_sbt_index:
return nir_iand_imm(b, isec_load(b, intersection, sbt_offset_and_flags), 0xFFFFFF);
case nir_ray_query_value_intersection_object_ray_direction: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
return nir_build_vec3_mat_mult(b, rq_load(b, rq, direction), wto_matrix, false);
}
case nir_ray_query_value_intersection_object_ray_origin: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
return nir_build_vec3_mat_mult(b, rq_load(b, rq, origin), wto_matrix, true);
}
case nir_ray_query_value_intersection_object_to_world: {
nir_def *otw_matrix[3];
radv_load_otw_matrix(device, b, isec_load(b, intersection, instance_addr), otw_matrix);
radv_load_otw_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), otw_matrix);
return nir_vec3(b, nir_channel(b, otw_matrix[0], column), nir_channel(b, otw_matrix[1], column),
nir_channel(b, otw_matrix[2], column));
}
@ -403,7 +402,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
}
case nir_ray_query_value_intersection_world_to_object: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
@ -419,7 +418,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
return rq_load(b, rq, origin);
case nir_ray_query_value_intersection_triangle_vertex_positions: {
nir_def *primitive_addr = isec_load(b, intersection, primitive_addr);
return radv_load_vertex_position(device, b, primitive_addr, nir_intrinsic_column(instr));
return radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(instr));
}
default:
UNREACHABLE("Invalid nir_ray_query_value!");
@ -429,7 +428,6 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
}
struct traversal_data {
const struct radv_device *device;
struct ray_query_vars *vars;
nir_deref_instr *rq;
};
@ -510,10 +508,8 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
static nir_def *
lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
struct radv_device *device)
const struct radv_compiler_info *compiler_info)
{
struct radv_physical_device *pdev = radv_device_physical(device);
nir_deref_instr *closest = rq_deref(b, rq, closest);
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
@ -550,7 +546,6 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
};
struct traversal_data data = {
.device = device,
.vars = vars,
.rq = rq,
};
@ -590,10 +585,10 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
nir_push_if(b, rq_load(b, rq, incomplete));
{
nir_def *incomplete;
if (pdev->cache_key.bvh8)
incomplete = radv_build_ray_traversal_gfx12(device, b, &args);
if (compiler_info->cache_key->bvh8)
incomplete = radv_build_ray_traversal_gfx12(compiler_info, b, &args);
else
incomplete = radv_build_ray_traversal(device, b, &args);
incomplete = radv_build_ray_traversal(compiler_info, b, &args);
rq_store(b, rq, incomplete, nir_iand(b, rq_load(b, rq, incomplete), incomplete));
}
nir_pop_if(b, NULL);
@ -618,10 +613,8 @@ radv_lower_opaque_ray_query_deref(nir_builder *b, nir_deref_instr *opaque_deref,
}
bool
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
radv_nir_lower_ray_queries(struct nir_shader *shader, const struct radv_compiler_info *compiler_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
bool progress = false;
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
@ -629,7 +622,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query)
continue;
lower_ray_query(shader, var, query_ht, pdev);
lower_ray_query(shader, var, query_ht, compiler_info);
progress = true;
}
@ -641,7 +634,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
if (!var->data.ray_query)
continue;
lower_ray_query(shader, var, query_ht, pdev);
lower_ray_query(shader, var, query_ht, compiler_info);
progress = true;
}
@ -676,13 +669,13 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
lower_rq_generate_intersection(&builder, intrinsic, rq);
break;
case nir_intrinsic_rq_initialize:
lower_rq_initialize(&builder, intrinsic, vars, rq, device);
lower_rq_initialize(&builder, intrinsic, vars, rq, compiler_info);
break;
case nir_intrinsic_rq_load:
new_dest = lower_rq_load(device, &builder, intrinsic, rq);
new_dest = lower_rq_load(compiler_info, &builder, intrinsic, rq);
break;
case nir_intrinsic_rq_proceed:
new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, device);
new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, compiler_info);
break;
case nir_intrinsic_rq_terminate:
lower_rq_terminate(&builder, intrinsic, rq);

View file

@ -4,7 +4,6 @@
* SPDX-License-Identifier: MIT
*/
#include "ac_gpu_info.h"
#include "ac_nir.h"
#include "nir.h"
#include "nir_builder.h"
@ -18,7 +17,7 @@ typedef struct {
const struct radv_shader_args *args;
const struct radv_shader_info *info;
const struct radv_graphics_state_key *gfx_state;
const struct radeon_info *gpu_info;
const struct radv_compiler_info *compiler_info;
} lower_vs_inputs_state;
static nir_def *
@ -271,12 +270,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
const enum pipe_format attrib_format = adjust_format(s->gfx_state->vi.vertex_attribute_formats[location]);
const struct util_format_description *f = util_format_description(attrib_format);
const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(
s->gpu_info->gfx_level, s->gpu_info->compiler_info.has_vtx_format_alpha_adjust_bug, attrib_format);
s->compiler_info->ac->gfx_level, s->compiler_info->ac->has_vtx_format_alpha_adjust_bug, attrib_format);
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & BITFIELD_MASK(binding_index));
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi));
nir_def *vertex_buffers =
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->compiler_info->hw.address32_hi));
nir_def *descriptor =
ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 4, ACCESS_CAN_SPECULATE);
nir_def *base_index = calc_vs_input_index(b, location, s);
@ -453,8 +453,8 @@ lower_vs_input_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
}
bool
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info)
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const struct radv_shader_stage *vs_stage, const struct radv_graphics_state_key *gfx_state)
{
assert(shader->info.stage == MESA_SHADER_VERTEX);
@ -462,7 +462,7 @@ radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_
.info = &vs_stage->info,
.args = &vs_stage->args,
.gfx_state = gfx_state,
.gpu_info = gpu_info,
.compiler_info = compiler_info,
};
return nir_shader_intrinsics_pass(shader, lower_vs_input_instr, nir_metadata_control_flow, &state);

View file

@ -9,18 +9,19 @@
#include "nir_builder.h"
#include "radv_debug.h"
static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and);
static nir_def *build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and);
bool
radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice)
radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info)
{
/* gfx12 requires using the bvh4 ds_bvh_stack_rtn differently - enable hw stack instrs on gfx12 only with bvh8 */
return ((pdevice->info.gfx_level >= GFX11 && pdevice->info.gfx_level < GFX12) || pdevice->cache_key.bvh8) &&
!pdevice->cache_key.emulate_rt;
return ((compiler_info->ac->gfx_level >= GFX11 && compiler_info->ac->gfx_level < GFX12) ||
compiler_info->cache_key->bvh8) &&
!compiler_info->cache_key->emulate_rt;
}
nir_def *
radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev, uint32_t workgroup_size,
radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info, uint32_t workgroup_size,
uint32_t stack_base, uint32_t max_stack_entries)
{
assert(stack_base % 4 == 0);
@ -41,7 +42,7 @@ radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct r
}
stack_idx = nir_iadd_imm(b, stack_idx, stack_base / 4);
/* There are 4 bytes in each stack entry so no further arithmetic is needed. */
if (pdev->info.gfx_level >= GFX12)
if (compiler_info->ac->gfx_level >= GFX12)
stack_idx = nir_ishl_imm(b, stack_idx, 15);
else
stack_idx = nir_ishl_imm(b, stack_idx, 18);
@ -72,7 +73,7 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var
}
static nir_def *
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
intersect_ray_amd_software_box(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
@ -81,7 +82,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_d
unsigned old_math_ctrl = b->fp_math_ctrl;
b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf;
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
/* vec4 distances = vec4(INF, INF, INF, INF); */
nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
@ -190,7 +191,7 @@ radv_build_intersect_vertex(nir_builder *b, nir_def *v0_x, nir_def *v1_x, nir_de
}
static nir_def *
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
intersect_ray_amd_software_tri(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
nir_def *origin, nir_def *dir, nir_def *inv_dir)
{
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
@ -198,7 +199,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d
unsigned old_math_ctrl = b->fp_math_ctrl;
b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf;
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
const uint32_t coord_offsets[3] = {
offsetof(struct radv_bvh_triangle_node, coords[0]),
@ -379,15 +380,13 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d
}
nir_def *
build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags)
build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const uint64_t bvh_size = 1ull << 42;
nir_def *node = nir_ushr_imm(b, addr, 3);
node = nir_iand_imm(b, node, (bvh_size - 1) << 3);
if (pdev->cache_key.bvh8) {
if (compiler_info->cache_key->bvh8) {
/* The HW ray flags are the same bits as the API flags.
* - SpvRayFlagsTerminateOnFirstHitKHRMask, SpvRayFlagsSkipClosestHitShaderKHRMask are handled in shader code.
* - SpvRayFlagsSkipTrianglesKHRMask, SpvRayFlagsSkipAABBsKHRMask do not work.
@ -403,14 +402,13 @@ build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, ni
}
static nir_def *
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
addr = nir_ishl_imm(b, addr, 3);
/* Assumes everything is in the top half of address space, which is true in
* GFX9+ for now. */
return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
return compiler_info->ac->gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
}
nir_def *
@ -431,11 +429,9 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool tr
}
nir_def *
radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index)
radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->cache_key.bvh8) {
if (compiler_info->cache_key->bvh8) {
/* Assume that vertices are uncompressed. */
uint32_t offset = ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float);
nir_def *data[4];
@ -461,12 +457,10 @@ radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *p
}
void
radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
if (pdev->cache_key.bvh8)
if (compiler_info->cache_key->bvh8)
offset = offsetof(struct radv_gfx12_instance_node, wto_matrix);
for (unsigned i = 0; i < 3; ++i) {
@ -476,12 +470,10 @@ radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instan
}
void
radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned offset = offsetof(struct radv_bvh_instance_node, otw_matrix);
if (pdev->cache_key.bvh8)
if (compiler_info->cache_key->bvh8)
offset =
sizeof(struct radv_gfx12_instance_node) + offsetof(struct radv_gfx12_instance_node_user_data, otw_matrix);
@ -492,11 +484,9 @@ radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instan
}
nir_def *
radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->cache_key.bvh8) {
if (compiler_info->cache_key->bvh8) {
return nir_load_global(b, 1, 32,
nir_iadd_imm(b, instance_addr,
sizeof(struct radv_gfx12_instance_node) +
@ -511,11 +501,9 @@ radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *i
}
nir_def *
radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->cache_key.bvh8) {
if (compiler_info->cache_key->bvh8) {
return nir_load_global(b, 1, 32,
nir_iadd_imm(b, instance_addr,
sizeof(struct radv_gfx12_instance_node) +
@ -540,14 +528,14 @@ hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags, const struct radv_r
}
static nir_def *
create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, struct radv_ray_flags *ray_flags)
create_bvh_descriptor(nir_builder *b, const struct radv_compiler_info *compiler_info, struct radv_ray_flags *ray_flags)
{
/* We create a BVH descriptor that covers the entire memory range. That way we can always
* use the same descriptor, which avoids divergence when different rays hit different
* instances at the cost of having to use 64-bit node ids. */
const uint64_t bvh_size = 1ull << 42;
const uint32_t sort_triangles_first = pdev->cache_key.bvh8 ? BITFIELD_BIT(52 - 32) : 0;
const uint32_t sort_triangles_first = compiler_info->cache_key->bvh8 ? BITFIELD_BIT(52 - 32) : 0;
const uint32_t box_sort_enable = BITFIELD_BIT(63 - 32);
const uint32_t triangle_return_mode = BITFIELD_BIT(120 - 96); /* Return IJ for triangles */
@ -556,7 +544,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
uint32_t dword2 = (bvh_size - 1) & 0xFFFFFFFFu;
uint32_t dword3 = ((bvh_size - 1) >> 32) | triangle_return_mode | (1u << 31);
if (pdev->info.gfx_level >= GFX11) {
if (compiler_info->ac->gfx_level >= GFX11) {
/* Enable pointer flags on GFX11+ */
dword3 |= BITFIELD_BIT(119 - 96);
@ -570,7 +558,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
nir_imm_int(b, (box_sort_largest << 21) | sort_triangles_first | box_sort_enable), dword1);
}
if (pdev->cache_key.bvh8) {
if (compiler_info->cache_key->bvh8) {
/* compressed_format_en */
dword3 |= BITFIELD_BIT(115 - 96);
/* wide_sort_en */
@ -584,10 +572,9 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
}
static void
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
insert_traversal_triangle_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
{
struct radv_physical_device *pdev = radv_device_physical(device);
if (!args->triangle_cb)
return;
@ -602,7 +589,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
{
intersection.frontface = nir_fgt_imm(b, div, 0);
nir_def *not_cull;
if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt) {
if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt) {
nir_def *switch_ccw =
nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING);
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
@ -623,7 +610,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
nir_flt(b, args->tmin, intersection.t), not_cull));
{
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
intersection.base.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
nir_def *triangle_info = nir_load_global(
b, 2, 32,
nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id)));
@ -648,7 +635,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
}
static void
insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
insert_traversal_triangle_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b,
const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags,
nir_variable *intrinsic_result, nir_def *result, nir_def *global_bvh_node,
nir_def *bvh_node)
@ -694,7 +681,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
nir_def *dword3 = nir_bcsel(b, triangle0, nir_channel(b, result, 3), nir_channel(b, result, 7));
intersection.frontface = nir_inot(b, nir_test_mask(b, dword3, 1));
intersection.base.node_addr = build_node_to_addr(device, b, global_bvh_node, false);
intersection.base.node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
intersection.base.primitive_id = nir_ishr_imm(b, dword3, 1);
intersection.base.geometry_id_and_flags =
nir_ishr_imm(b, nir_bcsel(b, triangle0, nir_channel(b, result, 8), nir_channel(b, result, 9)), 2);
@ -711,7 +698,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
}
static void
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
insert_traversal_aabb_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_def *bvh_node)
{
if (!args->aabb_cb)
@ -720,7 +707,7 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
nir_push_if(b, ray_flags->no_skip_aabbs);
{
struct radv_leaf_intersection intersection;
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
nir_def *triangle_info = nir_load_global(
b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
intersection.primitive_id = nir_channel(b, triangle_info, 0);
@ -738,14 +725,14 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
}
static void
insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
insert_traversal_aabb_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
{
if (!args->aabb_cb)
return;
struct radv_leaf_intersection intersection;
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
intersection.primitive_id = nir_ishr_imm(b, nir_channel(b, result, 3), 1);
intersection.geometry_id_and_flags = nir_ishr_imm(b, nir_channel(b, result, 8), 2);
intersection.opaque = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 2), 1u << 31));
@ -758,10 +745,9 @@ insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, con
}
static nir_def *
fetch_parent_node(struct radv_device *device, nir_builder *b, nir_def *bvh, nir_def *node)
fetch_parent_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh, nir_def *node)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, pdev->cache_key.bvh8 ? 16 : 8), 4), 4);
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, compiler_info->cache_key->bvh8 ? 16 : 8), 4), 4);
return nir_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
}
@ -780,10 +766,10 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3
}
static nir_def *
build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def *base_addr, nir_def *ptr_flags,
build_bvh_base(nir_builder *b, const struct radv_compiler_info *compiler_info, nir_def *base_addr, nir_def *ptr_flags,
bool overwrite)
{
if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt)
if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt)
return base_addr;
nir_def *base_addr_vec = nir_unpack_64_2x32(b, base_addr);
@ -796,7 +782,7 @@ build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def
}
static void
build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, const struct radv_ray_traversal_args *args,
build_instance_exit(nir_builder *b, const struct radv_compiler_info *compiler_info, const struct radv_ray_traversal_args *args,
nir_def *stack_instance_exit, nir_def *ptr_flags)
{
nir_def *root_instance_exit = nir_iand(
@ -805,7 +791,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
instance_exit->control = nir_selection_control_dont_flatten;
{
if (pdev->cache_key.bvh8 && args->use_bvh_stack_rtn)
if (compiler_info->cache_key->bvh8 && args->use_bvh_stack_rtn)
nir_store_deref(b, args->vars.stack,
nir_ior_imm(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), 0x1);
else
@ -814,7 +800,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
nir_def *root_bvh_base =
pdev->cache_key.bvh8 ? args->root_bvh_base : build_bvh_base(b, pdev, args->root_bvh_base, ptr_flags, true);
compiler_info->cache_key->bvh8 ? args->root_bvh_base : build_bvh_base(b, compiler_info, args->root_bvh_base, ptr_flags, true);
nir_store_deref(b, args->vars.bvh_base, root_bvh_base, 0x1);
nir_store_deref(b, args->vars.origin, args->origin, 7);
@ -825,9 +811,8 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
}
nir_def *
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec4_type(), "intrinsic_result");
@ -849,9 +834,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_iand_imm(b, args->flags, ~(SpvRayFlagsTerminateOnFirstHitKHRMask | SpvRayFlagsSkipClosestHitShaderKHRMask));
nir_store_deref(b, args->vars.bvh_base,
build_bvh_base(b, pdev, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1);
build_bvh_base(b, compiler_info, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1);
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags);
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
nir_loop *loop = nir_push_loop(b);
@ -868,7 +853,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
build_instance_exit(b, pdev, args,
build_instance_exit(b, compiler_info, args,
nir_ilt(b, nir_load_deref(b, args->vars.stack), nir_load_deref(b, args->vars.top_stack)),
ptr_flags);
}
@ -885,7 +870,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_pop_if(b, NULL);
build_instance_exit(
b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
ptr_flags);
}
@ -901,9 +886,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
if (args->use_bvh_stack_rtn)
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), 1), 0x1);
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev);
nir_def *parent = fetch_parent_node(compiler_info, b, bvh_addr, prev);
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
{
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
@ -945,7 +930,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
bool has_result = false;
if (pdev->info.compiler_info.has_image_bvh_intersect_ray && !pdev->cache_key.emulate_rt) {
if (compiler_info->ac->has_image_bvh_intersect_ray && !compiler_info->cache_key->emulate_rt) {
nir_store_var(
b, intrinsic_result,
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
@ -961,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
{
nir_push_if(b, nir_test_mask(b, bvh_node, BITFIELD64_BIT(ffs(radv_bvh_node_aabb) - 1)));
{
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
insert_traversal_aabb_case(compiler_info, b, args, &ray_flags, global_bvh_node);
}
nir_push_else(b, NULL);
{
@ -972,14 +957,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
/* instance */
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
nir_def *instance_data =
nir_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
nir_def *wto_matrix[3];
radv_load_wto_matrix(device, b, instance_node_addr, wto_matrix);
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
@ -1006,7 +991,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
nir_def *instance_pointer = nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2));
instance_pointer = nir_iand(b, instance_pointer, instance_flag_mask);
nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, pdev, instance_pointer, ptr_flags, false),
nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, compiler_info, instance_pointer, ptr_flags, false),
0x1);
/* Push the instance root node onto the stack */
@ -1040,7 +1025,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/box intersection here */
result = intersect_ray_amd_software_box(
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
@ -1097,10 +1082,10 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
/* If we didn't run the intrinsic cause the hardware didn't support it,
* emulate ray/tri intersection here */
result = intersect_ray_amd_software_tri(
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
}
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
insert_traversal_triangle_case(compiler_info, b, args, &ray_flags, result, global_bvh_node);
}
nir_pop_if(b, NULL);
@ -1131,10 +1116,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
}
nir_def *
radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec_type(8), "intrinsic_result");
@ -1152,7 +1135,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
.no_skip_aabbs = radv_test_flag(b, args, SpvRayFlagsSkipAABBsKHRMask, false),
};
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags);
nir_loop *loop = nir_push_loop(b);
{
@ -1168,7 +1151,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_jump(b, nir_jump_break);
}
nir_pop_if(b, NULL);
build_instance_exit(b, pdev, args,
build_instance_exit(b, compiler_info, args,
nir_test_mask(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), NULL);
}
@ -1184,7 +1167,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
}
nir_pop_if(b, NULL);
build_instance_exit(
b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
NULL);
}
@ -1210,7 +1193,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_imm_int(b,
(int32_t)offsetof(struct radv_gfx12_box_node, parent_id) - (radv_bvh_node_box32 << 3)));
nir_def *offset = nir_iadd(b, nir_ishl_imm(b, prev, 3), field_offset);
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true);
loaded_parent_id = nir_load_global(b, 1, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
}
nir_push_else(b, NULL);
@ -1294,7 +1277,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_push_else(b, NULL);
{
/* instance */
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, result, 6), 1);
@ -1377,13 +1360,13 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
nir_push_if(b, nir_test_mask(b, nir_channel(b, result, 1), 1u << 31));
{
nir_push_if(b, ray_flags.no_skip_aabbs);
insert_traversal_aabb_case_gfx12(device, b, args, &ray_flags, result, global_bvh_node);
insert_traversal_aabb_case_gfx12(compiler_info, b, args, &ray_flags, result, global_bvh_node);
nir_pop_if(b, NULL);
}
nir_push_else(b, NULL);
{
nir_push_if(b, ray_flags.no_skip_triangles);
insert_traversal_triangle_case_gfx12(device, b, args, &ray_flags, intrinsic_result, result, global_bvh_node,
insert_traversal_triangle_case_gfx12(compiler_info, b, args, &ray_flags, intrinsic_result, result, global_bvh_node,
bvh_node);
nir_pop_if(b, NULL);
}

View file

@ -12,27 +12,26 @@
#include "compiler/spirv/spirv.h"
struct radv_device;
struct radv_physical_device;
struct radv_compiler_info;
bool radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice);
bool radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info);
nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev,
nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info,
uint32_t workgroup_size, uint32_t stack_base, uint32_t max_stack_entries);
nir_def *build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags);
nir_def *build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags);
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index);
nir_def *radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index);
void radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
void radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out);
void radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
void radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out);
nir_def *radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
nir_def *radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr);
nir_def *radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
nir_def *radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr);
struct radv_ray_traversal_args;
@ -162,10 +161,10 @@ struct radv_ray_traversal_args {
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
* callbacks breaks out of the traversal loop.
*/
nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
nir_def *radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b,
const struct radv_ray_traversal_args *args);
nir_def *radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b,
nir_def *radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b,
const struct radv_ray_traversal_args *args);
#endif /* RADV_NIR_RT_COMMON_H */

View file

@ -257,11 +257,9 @@ radv_nir_return_param_from_type(nir_parameter *param, const glsl_type *type, boo
}
void
radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap,
struct radv_shader_debug_info *debug)
radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
bool uses_descriptor_heap, struct radv_shader_debug_info *debug)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "rt_prolog");
stage->stage = MESA_SHADER_COMPUTE;
stage->nir = b.shader;
@ -270,24 +268,25 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
stage->info.loads_dynamic_offsets = true;
stage->info.force_indirect_descriptors = true;
stage->info.descriptor_heap = uses_descriptor_heap;
stage->info.wave_size = pdev->rt_wave_size;
stage->info.wave_size = compiler_info->rt_wave_size;
stage->info.workgroup_size = stage->info.wave_size;
stage->info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0;
stage->info.type = RADV_SHADER_TYPE_RT_PROLOG;
stage->info.cs.block_size[0] = pdev->rt_wave_size;
stage->info.cs.block_size[0] = compiler_info->rt_wave_size;
stage->info.cs.block_size[1] = 1;
stage->info.cs.block_size[2] = 1;
stage->info.cs.uses_thread_id[0] = true;
for (unsigned i = 0; i < 3; i++)
stage->info.cs.uses_block_id[i] = true;
radv_declare_shader_args(device, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args, debug);
radv_declare_shader_args(compiler_info, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args,
debug);
stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
b.shader->info.workgroup_size[0] = pdev->rt_wave_size;
b.shader->info.api_subgroup_size = pdev->rt_wave_size;
b.shader->info.max_subgroup_size = pdev->rt_wave_size;
b.shader->info.min_subgroup_size = pdev->rt_wave_size;
b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size;
b.shader->info.api_subgroup_size = compiler_info->rt_wave_size;
b.shader->info.max_subgroup_size = compiler_info->rt_wave_size;
b.shader->info.min_subgroup_size = compiler_info->rt_wave_size;
nir_function *raygen_function = nir_function_create(b.shader, "raygen_func");
radv_nir_init_rt_function_params(raygen_function, MESA_SHADER_RAYGEN, 0, 0, uses_descriptor_heap);
@ -306,7 +305,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
nir_def *launch_size_addr = nir_pack_64_2x32(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.launch_size_addr));
nir_def *traversal_addr =
nir_pack_64_2x32_split(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.traversal_shader_addr),
nir_imm_int(&b, pdev->info.address32_hi));
nir_imm_int(&b, compiler_info->hw.address32_hi));
nir_def *raygen_sbt = nir_pack_64_2x32(&b, ac_nir_load_smem(&b, 2, sbt_desc, nir_imm_int(&b, 0), 4, 0));
nir_def *launch_sizes = ac_nir_load_smem(&b, 3, launch_size_addr, nir_imm_int(&b, 0), 4, 0);
@ -320,7 +319,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
nir_def *local_id = nir_channel(&b, nir_load_local_invocation_id(&b), 0);
nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size), local_id);
nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size), local_id);
nir_def *unswizzled_id_y = wg_ids[1];
/* Swizzle ray launch IDs. We dispatch a 1D 32x1/64x1 workgroup natively. Many games dispatch
@ -366,7 +365,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
swizzled_id_y = nir_bitfield_select(&b, nir_imm_int(&b, 0x3), swizzled_id_y, swizzled_id_shifted_y);
uint32_t workgroup_width = 8;
uint32_t workgroup_height = pdev->rt_wave_size == 32 ? 4 : 8;
uint32_t workgroup_height = compiler_info->rt_wave_size == 32 ? 4 : 8;
uint32_t workgroup_height_mask = workgroup_height - 1;
/* Fix up the workgroup IDs after converting from 32x1/64x1 to 8x4/8x8. The X dimension of the
@ -376,7 +375,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
* the fact we divided the X component of the ID.
*/
nir_def *wg_id_y_rem = nir_iand_imm(&b, wg_ids[1], workgroup_height_mask);
nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size);
nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size);
new_wg_start_x = nir_iadd(&b, new_wg_start_x, nir_imul_imm(&b, wg_id_y_rem, workgroup_width));
nir_def *new_wg_start_y = nir_iand_imm(&b, wg_ids[1], ~workgroup_height_mask);
@ -393,7 +392,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
/* If parts of this wave would've exceeded the launch size in the X dimension, their threads will be masked out and
* exec won't equal -1. In that case, using swizzled IDs is invalid.
*/
nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, pdev->rt_wave_size, nir_imm_true(&b)), -1);
nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, compiler_info->rt_wave_size, nir_imm_true(&b)), -1);
nir_def *partial_oob_y = nir_uge(&b, wg_ids[1], y_wg_bound);
nir_def *partial_oob = nir_ior(&b, partial_oob_x, partial_oob_y);

View file

@ -41,7 +41,7 @@ typedef struct glsl_type glsl_type;
*/
struct radv_rt_case_data {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
struct radv_ray_tracing_pipeline *pipeline;
void *param_data;
};
@ -154,12 +154,12 @@ struct radv_nir_rt_traversal_result {
nir_variable *barycentrics;
};
struct radv_nir_rt_traversal_result radv_build_traversal(struct radv_device *device,
struct radv_nir_rt_traversal_result radv_build_traversal(const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline, nir_builder *b,
struct radv_nir_rt_traversal_params *params,
struct radv_ray_tracing_stage_info *info);
void radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap,
struct radv_shader_debug_info *debug);
void radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
bool uses_descriptor_heap, struct radv_shader_debug_info *debug);
#endif // MESA_RADV_NIR_RT_STAGE_COMMON_H

View file

@ -15,9 +15,7 @@
#include "ac_nir.h"
#include "aco_nir_call_attribs.h"
#include "radv_device.h"
#include "radv_nir_rt_stage_functions.h"
#include "radv_physical_device.h"
#include "radv_pipeline_rt.h"
#include "radv_shader.h"
@ -69,7 +67,7 @@ radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *ni
* Global variables for an RT pipeline
*/
struct rt_variables {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
const VkPipelineCreateFlags2 flags;
nir_variable *shader_addr;
@ -107,10 +105,11 @@ struct rt_variables {
};
static struct rt_variables
create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags)
create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const VkPipelineCreateFlags2 flags)
{
struct rt_variables vars = {
.device = device,
.compiler_info = compiler_info,
.flags = flags,
};
vars.shader_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_addr");
@ -184,6 +183,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
struct radv_lower_rt_instruction_data *data = _data;
struct rt_variables *vars = data->vars;
const struct radv_compiler_info *compiler_info = vars->compiler_info;
b->cursor = nir_before_instr(&intr->instr);
@ -297,7 +297,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
break;
}
case nir_intrinsic_load_ray_instance_custom_index: {
ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr));
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr));
break;
}
case nir_intrinsic_load_primitive_id: {
@ -310,7 +310,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
break;
}
case nir_intrinsic_load_instance_id: {
ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr));
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr));
break;
}
case nir_intrinsic_load_ray_flags: {
@ -325,7 +325,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
unsigned c = nir_intrinsic_column(intr);
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
@ -337,20 +337,20 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_def *otw_matrix[3];
radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix);
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix);
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
nir_channel(b, otw_matrix[2], c));
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false);
break;
}
@ -430,7 +430,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
}
case nir_intrinsic_load_ray_triangle_vertex_positions: {
nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr);
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
break;
}
default:
@ -554,7 +554,7 @@ init_cps_function(nir_function *function, bool has_position_fetch, bool uses_des
void
radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader,
struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info)
{
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
@ -574,7 +574,7 @@ radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *inf
impl->function->params[idx].driver_attributes |= ACO_NIR_PARAM_ATTRIB_DISCARDABLE;
}
struct rt_variables vars = create_rt_variables(shader, device, pipeline->base.base.create_flags);
struct rt_variables vars = create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags);
struct radv_rt_shader_info rt_info = {0};

View file

@ -14,8 +14,9 @@
void radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *nir);
void radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader,
struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info);
const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline, bool has_position_fetch,
const struct radv_ray_tracing_stage_info *traversal_info);
void radv_nir_lower_rt_io_cps(nir_shader *shader);
#endif // RADV_NIR_RT_STAGE_CPS_H

View file

@ -13,8 +13,6 @@
#include "nir/radv_nir_rt_stage_common.h"
#include "nir/radv_nir_rt_stage_functions.h"
#include "radv_device.h"
#include "radv_physical_device.h"
#include "radv_shader.h"
#include "aco_nir_call_attribs.h"
@ -172,7 +170,7 @@ radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage stage
* Global variables for an RT pipeline
*/
struct rt_variables {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
const VkPipelineCreateFlags2 flags;
/* Stage-dependent parameter indices */
@ -214,12 +212,12 @@ struct rt_variables {
};
static struct rt_variables
create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags,
unsigned max_payload_size, unsigned max_hit_attrib_size)
create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info,
const VkPipelineCreateFlags2 flags, unsigned max_payload_size, unsigned max_hit_attrib_size)
{
const bool uses_descriptor_heap = flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
struct rt_variables vars = {
.device = device,
.compiler_info = compiler_info,
.flags = flags,
};
@ -349,6 +347,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
struct rt_variables *vars = _vars;
const bool uses_descriptor_heap = vars->flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
const struct radv_compiler_info *compiler_info = vars->compiler_info;
b->cursor = nir_before_instr(&intr->instr);
@ -452,7 +451,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
break;
}
case nir_intrinsic_load_ray_instance_custom_index: {
ret = radv_load_custom_instance(vars->device, b, nir_load_param(b, vars->instance_addr_param));
ret = radv_load_custom_instance(compiler_info, b, nir_load_param(b, vars->instance_addr_param));
break;
}
case nir_intrinsic_load_primitive_id: {
@ -465,7 +464,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
break;
}
case nir_intrinsic_load_instance_id: {
ret = radv_load_instance_id(vars->device, b, nir_load_param(b, vars->instance_addr_param));
ret = radv_load_instance_id(compiler_info, b, nir_load_param(b, vars->instance_addr_param));
break;
}
case nir_intrinsic_load_ray_flags: {
@ -483,7 +482,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
unsigned c = nir_intrinsic_column(intr);
nir_def *instance_node_addr = nir_load_param(b, vars->instance_addr_param);
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
@ -495,20 +494,20 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_def *otw_matrix[3];
radv_load_otw_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), otw_matrix);
radv_load_otw_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), otw_matrix);
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
nir_channel(b, otw_matrix[2], c));
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_origin_param), wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_direction_param), wto_matrix, false);
break;
}
@ -713,7 +712,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
}
case nir_intrinsic_load_ray_triangle_vertex_positions: {
nir_def *primitive_addr = nir_load_param(b, vars->primitive_addr_param);
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
break;
}
default:
@ -852,7 +851,7 @@ radv_get_rt_shader_entrypoint(nir_shader *shader)
void
radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size,
uint32_t hit_attrib_size, struct radv_device *device,
uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline)
{
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
@ -866,7 +865,7 @@ radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_inf
uses_descriptor_heap);
struct rt_variables vars =
create_rt_variables(shader, device, pipeline->base.base.create_flags, payload_size, hit_attrib_size);
create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags, payload_size, hit_attrib_size);
nir_builder b = nir_builder_at(nir_before_impl(impl));
unsigned num_hit_attribs = DIV_ROUND_UP(hit_attrib_size, 4);

View file

@ -17,7 +17,7 @@ void radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage
unsigned hit_attrib_size, bool uses_descriptor_heap);
void radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size,
uint32_t hit_attrib_size, struct radv_device *device,
uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline);
void radv_nir_lower_rt_io_functions(nir_shader *shader);

View file

@ -11,12 +11,10 @@
#include "aco_nir_call_attribs.h"
#include "nir_builder.h"
#include "radv_device.h"
#include "radv_nir_rt_stage_functions.h"
#include "radv_physical_device.h"
struct chit_miss_inlining_params {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
struct radv_nir_rt_traversal_params *trav_params;
struct radv_nir_rt_traversal_result *trav_result;
@ -27,7 +25,7 @@ struct chit_miss_inlining_params {
};
struct chit_miss_inlining_vars {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
nir_variable *shader_record_ptr;
nir_variable *origin;
@ -104,6 +102,7 @@ static bool
lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *_vars)
{
struct chit_miss_inlining_vars *vars = _vars;
const struct radv_compiler_info *compiler_info = vars->compiler_info;
b->cursor = nir_after_instr(&intr->instr);
@ -125,13 +124,13 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
ret = nir_load_var(b, vars->tmin);
break;
case nir_intrinsic_load_ray_instance_custom_index:
ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr));
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr));
break;
case nir_intrinsic_load_primitive_id:
ret = nir_load_var(b, vars->primitive_id);
break;
case nir_intrinsic_load_instance_id:
ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr));
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr));
break;
case nir_intrinsic_load_ray_hit_kind:
ret = nir_load_var(b, vars->hit_kind);
@ -151,7 +150,7 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
unsigned c = nir_intrinsic_column(intr);
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
@ -163,26 +162,26 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_def *otw_matrix[3];
radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix);
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix);
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
nir_channel(b, otw_matrix[2], c));
break;
}
case nir_intrinsic_load_ray_object_origin: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true);
break;
}
case nir_intrinsic_load_ray_object_direction: {
nir_def *wto_matrix[3];
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false);
break;
}
case nir_intrinsic_load_ray_triangle_vertex_positions: {
nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr);
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
break;
}
default:
@ -238,7 +237,6 @@ radv_nir_lower_rt_io_monolithic(nir_shader *nir)
}
struct rt_variables {
struct radv_device *device;
const VkPipelineCreateFlags2 flags;
uint32_t payload_offset;
@ -252,13 +250,13 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_
struct radv_rt_case_data *data)
{
nir_shader *shader =
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->recursive_shader].nir);
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->recursive_shader].nir);
assert(shader);
struct chit_miss_inlining_params *params = data->param_data;
struct chit_miss_inlining_vars vars = {
.device = params->device,
.compiler_info = params->compiler_info,
};
nir_opt_dead_cf(shader);
@ -283,7 +281,7 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_
}
struct lower_rt_instruction_monolithic_state {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
struct radv_ray_tracing_pipeline *pipeline;
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo;
@ -296,8 +294,8 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
b->cursor = nir_after_instr(&intr->instr);
struct lower_rt_instruction_monolithic_state *state = data;
const struct radv_physical_device *pdev = radv_device_physical(state->device);
struct rt_variables *vars = state->vars;
const struct radv_compiler_info *compiler_info = state->compiler_info;
switch (intr->intrinsic) {
case nir_intrinsic_execute_callable:
@ -333,19 +331,19 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, vars->stack_size), 0x1);
struct radv_nir_rt_traversal_result result =
radv_build_traversal(state->device, state->pipeline, b, &params, NULL);
radv_build_traversal(compiler_info, state->pipeline, b, &params, NULL);
nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
struct chit_miss_inlining_params inline_params = {
.device = state->device,
.compiler_info = compiler_info,
.trav_params = &params,
.trav_result = &result,
.payload_offset = vars->payload_offset,
};
struct radv_rt_case_data case_data = {
.device = state->device,
.compiler_info = compiler_info,
.pipeline = state->pipeline,
.param_data = &inline_params,
};
@ -383,7 +381,7 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
nir_pop_if(b, NULL);
b->shader->info.shared_size =
MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
MAX2(b->shader->info.shared_size, compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
nir_instr_remove(&intr->instr);
return true;
@ -455,7 +453,7 @@ radv_count_ray_payload_size(nir_builder *b, nir_intrinsic_instr *instr, void *da
}
void
radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline)
{
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
@ -465,7 +463,6 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
nir_builder b = nir_builder_at(nir_before_impl(impl));
struct rt_variables vars = {
.device = device,
.flags = pipeline->base.base.create_flags,
.stack_size = b.shader->scratch_size,
};
@ -474,7 +471,7 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
struct lower_rt_instruction_monolithic_state state = {
.device = device,
.compiler_info = compiler_info,
.pipeline = pipeline,
.vars = &vars,
};

View file

@ -11,7 +11,7 @@
#include "radv_pipeline_rt.h"
void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline);
void radv_nir_lower_rt_io_monolithic(nir_shader *shader);

View file

@ -13,7 +13,6 @@
#include "aco_nir_call_attribs.h"
#include "nir_builder.h"
#include "radv_device.h"
#include "radv_meta_nir.h"
#include "radv_nir_rt_stage_functions.h"
#include "radv_physical_device.h"
@ -64,7 +63,7 @@ struct anyhit_shader_vars {
/* Parameters passed through to an inlined any-hit/intersection shader */
struct traversal_inlining_params {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
radv_nir_ahit_isec_preprocess_cb preprocess;
void *preprocess_data;
@ -76,7 +75,7 @@ struct traversal_inlining_params {
/* Data about ray traversal passed through to AABB/Intersection callbacks */
struct traversal_data {
struct radv_device *device;
const struct radv_compiler_info *compiler_info;
struct radv_nir_rt_traversal_params *params;
struct traversal_vars trav_vars;
@ -223,6 +222,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
b->cursor = nir_after_instr(&intr->instr);
struct traversal_inlining_params *params = _params;
const struct radv_compiler_info *compiler_info = params->compiler_info;
nir_def *ret = NULL;
switch (intr->intrinsic) {
@ -252,13 +252,13 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
ret = nir_load_var(b, params->anyhit_vars->tmin);
break;
case nir_intrinsic_load_ray_instance_custom_index:
ret = radv_load_custom_instance(params->device, b, nir_load_var(b, params->candidate->instance_addr));
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, params->candidate->instance_addr));
break;
case nir_intrinsic_load_primitive_id:
ret = nir_load_var(b, params->candidate->primitive_id);
break;
case nir_intrinsic_load_instance_id:
ret = radv_load_instance_id(params->device, b, nir_load_var(b, params->candidate->instance_addr));
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, params->candidate->instance_addr));
break;
case nir_intrinsic_load_ray_hit_kind:
ret = nir_load_var(b, params->candidate->hit_kind);
@ -278,7 +278,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
unsigned c = nir_intrinsic_column(intr);
nir_def *instance_node_addr = nir_load_var(b, params->candidate->instance_addr);
nir_def *wto_matrix[3];
radv_load_wto_matrix(params->device, b, instance_node_addr, wto_matrix);
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
nir_def *vals[3];
for (unsigned i = 0; i < 3; ++i)
@ -290,7 +290,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
case nir_intrinsic_load_ray_object_to_world: {
unsigned c = nir_intrinsic_column(intr);
nir_def *otw_matrix[3];
radv_load_otw_matrix(params->device, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix);
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix);
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
nir_channel(b, otw_matrix[2], c));
break;
@ -346,7 +346,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
}
case nir_intrinsic_load_ray_triangle_vertex_positions: {
nir_def *primitive_addr = nir_load_var(b, params->candidate->primitive_addr);
ret = radv_load_vertex_position(params->device, b, primitive_addr, nir_intrinsic_column(intr));
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
break;
}
default:
@ -371,7 +371,7 @@ insert_inlined_shader(nir_builder *b, struct traversal_inlining_params *params,
* duplicates to the original variables passed through in 'params'.
*/
struct traversal_inlining_params src_params = {
.device = params->device,
.compiler_info = params->compiler_info,
};
struct traversal_vars src_trav_vars;
@ -647,7 +647,7 @@ radv_build_ahit_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
struct traversal_inlining_params *params = data->param_data;
nir_shader *nir_stage =
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir);
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir);
assert(nir_stage);
params->preprocess(nir_stage, params->preprocess_data);
@ -673,7 +673,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
struct traversal_inlining_params *params = data->param_data;
nir_shader *nir_stage =
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->intersection_shader].nir);
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->intersection_shader].nir);
assert(nir_stage);
params->preprocess(nir_stage, params->preprocess_data);
@ -681,7 +681,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
nir_shader *any_hit_stage = NULL;
if (group->any_hit_shader != VK_SHADER_UNUSED_KHR) {
any_hit_stage =
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir);
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir);
assert(any_hit_stage);
params->preprocess(any_hit_stage, params->preprocess_data);
@ -698,7 +698,7 @@ static nir_def *
radv_build_token_begin(nir_builder *b, struct traversal_data *data, nir_def *hit,
enum radv_packed_token_type token_type, nir_def *token_size, uint32_t max_token_size)
{
struct radv_rra_trace_data *rra_trace = &data->device->rra_trace;
struct radv_rra_trace_data *rra_trace = data->compiler_info->rra_trace;
assert(rra_trace->ray_history_addr);
assert(rra_trace->ray_history_buffer_size >= max_token_size);
@ -767,7 +767,7 @@ radv_build_end_trace_token(nir_builder *b, struct traversal_data *data, nir_def
dst_addr = nir_iadd_imm(b, dst_addr, 8);
nir_def *dispatch_indices =
ac_nir_load_smem(b, 2, nir_imm_int64(b, data->device->rra_trace.ray_history_addr),
ac_nir_load_smem(b, 2, nir_imm_int64(b, data->compiler_info->rra_trace->ray_history_addr),
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0);
nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1));
nir_def *dispatch_and_flags = nir_iand_imm(b, data->params->cull_mask_and_flags, 0xFFFF);
@ -883,7 +883,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1);
struct traversal_inlining_params inlining_params = {
.device = data->device,
.compiler_info = data->compiler_info,
.trav_vars = &data->trav_vars,
.candidate = &candidate_result,
.anyhit_vars = &ahit_vars,
@ -892,7 +892,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
};
struct radv_rt_case_data case_data = {
.device = data->device,
.compiler_info = data->compiler_info,
.pipeline = data->pipeline,
.param_data = &inlining_params,
};
@ -1040,7 +1040,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1);
struct traversal_inlining_params inlining_params = {
.device = data->device,
.compiler_info = data->compiler_info,
.trav_vars = &data->trav_vars,
.candidate = &candidate_result,
.anyhit_vars = &ahit_vars,
@ -1049,7 +1049,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
};
struct radv_rt_case_data case_data = {
.device = data->device,
.compiler_info = data->compiler_info,
.pipeline = data->pipeline,
.param_data = &inlining_params,
};
@ -1148,17 +1148,17 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
}
struct radv_nir_rt_traversal_result
radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, nir_builder *b,
struct radv_nir_rt_traversal_params *params, struct radv_ray_tracing_stage_info *info)
radv_build_traversal(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
nir_builder *b, struct radv_nir_rt_traversal_params *params,
struct radv_ray_tracing_stage_info *info)
{
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
const struct radv_physical_device *pdev = radv_device_physical(device);
nir_variable *barycentrics =
nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
barycentrics->data.driver_location = 0;
struct traversal_data data = {
.device = device,
.compiler_info = compiler_info,
.params = params,
.pipeline = pipeline,
};
@ -1202,25 +1202,27 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
}
nir_push_else(b, NULL);
{
nir_store_var(b, data.trav_vars.current_node,
nir_imm_int(b, radv_use_bvh_stack_rtn(pdev) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE),
0x1);
nir_store_var(
b, data.trav_vars.current_node,
nir_imm_int(b, radv_use_bvh_stack_rtn(compiler_info) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE),
0x1);
}
nir_pop_if(b, NULL);
bvh_offset = nir_if_phi(b, bvh_offset, zero);
nir_def *root_bvh_base = nir_iadd(b, params->accel_struct, nir_u2u64(b, bvh_offset));
root_bvh_base = build_addr_to_node(device, b, root_bvh_base, params->cull_mask_and_flags);
root_bvh_base = build_addr_to_node(compiler_info, b, root_bvh_base, params->cull_mask_and_flags);
nir_def *stack_idx = nir_load_subgroup_invocation(b);
uint32_t stack_stride;
if (radv_use_bvh_stack_rtn(pdev)) {
stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, pdev->rt_wave_size, 0, MAX_STACK_ENTRY_COUNT);
if (radv_use_bvh_stack_rtn(compiler_info)) {
stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, compiler_info->rt_wave_size, 0,
MAX_STACK_ENTRY_COUNT);
stack_stride = 1;
} else {
stack_idx = nir_imul_imm(b, stack_idx, sizeof(uint32_t));
stack_stride = pdev->rt_wave_size * sizeof(uint32_t);
stack_stride = compiler_info->rt_wave_size * sizeof(uint32_t);
}
nir_store_var(b, data.trav_vars.result.hit, nir_imm_false(b), 1);
@ -1245,7 +1247,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
nir_store_var(b, data.trav_vars.top_stack, nir_imm_int(b, -1), 1);
nir_variable *iteration_instance_count = NULL;
if (device->rra_trace.ray_history_addr) {
if (compiler_info->rra_trace->ray_history_addr) {
data.trav_vars.ahit_isec_count =
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "ahit_isec_count");
iteration_instance_count =
@ -1277,16 +1279,16 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
.triangle_cb = (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)
? NULL
: handle_candidate_triangle,
.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(pdev),
.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(compiler_info),
.data = &data,
};
if (pdev->cache_key.bvh8)
radv_build_ray_traversal_gfx12(device, b, &args);
if (compiler_info->cache_key->bvh8)
radv_build_ray_traversal_gfx12(compiler_info, b, &args);
else
radv_build_ray_traversal(device, b, &args);
radv_build_ray_traversal(compiler_info, b, &args);
if (device->rra_trace.ray_history_addr)
if (compiler_info->rra_trace->ray_history_addr)
radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count));
nir_progress(true, b->impl, nir_metadata_none);
@ -1304,21 +1306,19 @@ preprocess_traversal_shader_ahit_isec(nir_shader *nir, void *cb)
}
nir_shader *
radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
radv_build_traversal_shader(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
struct radv_ray_tracing_stage_info *info, radv_nir_traversal_preprocess_cb preprocess,
uint32_t payload_size, uint32_t hit_attrib_size)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
/* Create the traversal shader as an intersection shader to prevent validation failures due to
* invalid variable modes.*/
nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_INTERSECTION, "rt_traversal");
b.shader->options = &pdev->nir_options[MESA_SHADER_INTERSECTION];
b.shader->info.workgroup_size[0] = pdev->rt_wave_size;
b.shader->info.api_subgroup_size = pdev->rt_wave_size;
b.shader->info.max_subgroup_size = pdev->rt_wave_size;
b.shader->info.min_subgroup_size = pdev->rt_wave_size;
b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
b.shader->options = &compiler_info->nir_options[MESA_SHADER_INTERSECTION];
b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size;
b.shader->info.api_subgroup_size = compiler_info->rt_wave_size;
b.shader->info.max_subgroup_size = compiler_info->rt_wave_size;
b.shader->info.min_subgroup_size = compiler_info->rt_wave_size;
b.shader->info.shared_size = compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
struct radv_nir_rt_traversal_params params = {0};
@ -1354,7 +1354,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
params.hit_attrib_size = hit_attrib_size;
params.ignore_cull_mask = false;
struct radv_nir_rt_traversal_result result = radv_build_traversal(device, pipeline, &b, &params, info);
struct radv_nir_rt_traversal_result result = radv_build_traversal(compiler_info, pipeline, &b, &params, info);
b.cursor = nir_before_impl(nir_shader_get_entrypoint(b.shader));
@ -1366,7 +1366,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
}
b.cursor = nir_after_impl(nir_shader_get_entrypoint(b.shader));
radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, pdev->rt_wave_size);
radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, compiler_info->rt_wave_size);
nir_push_if(&b, nir_load_var(&b, result.hit));
{

View file

@ -13,7 +13,8 @@ typedef void (*radv_nir_traversal_preprocess_cb)(nir_shader *nir);
void radv_nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit);
nir_shader *radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
nir_shader *radv_build_traversal_shader(const struct radv_compiler_info *compiler_info,
struct radv_ray_tracing_pipeline *pipeline,
struct radv_ray_tracing_stage_info *info,
radv_nir_traversal_preprocess_cb preprocess, uint32_t payload_size,
uint32_t hit_attrib_size);

View file

@ -41,13 +41,12 @@ radv_calculate_lds_size(const struct radv_shader_info *radv, const enum amd_gfx_
static inline void
radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv,
const struct radv_shader_args *radv_args, const struct radv_device_cache_key *radv_key,
const enum amd_gfx_level gfx_level)
const struct radv_shader_args *radv_args, const struct radv_compiler_info *compiler_info)
{
bool ngg_wave_id_en = radv->ngg_wave_id_en;
/* Separately compiled shader, where the next stage might use NGG streamout. */
ngg_wave_id_en |= radv->is_ngg && radv->merged_shader_compiled_separately &&
radv->next_stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX11;
radv->next_stage == MESA_SHADER_GEOMETRY && compiler_info->ac->gfx_level >= GFX11;
ASSIGN_FIELD(wave_size);
ASSIGN_FIELD(workgroup_size);
@ -60,16 +59,17 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
ASSIGN_FIELD(descriptor_heap);
aco_info->vs.any_tcs_inputs_via_lds = radv->vs.tcs_inputs_via_lds != 0;
/* S2 must not be modified for correct hang recovery when NGG_WAVE_ID_EN=1. */
aco_info->vs.preserve_s2 = ngg_wave_id_en && gfx_level < GFX12;
aco_info->vs.preserve_s2 = ngg_wave_id_en && compiler_info->ac->gfx_level < GFX12;
aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena;
aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr;
aco_info->ps.has_prolog = false;
aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d;
aco_info->image_2d_view_of_3d = compiler_info->image_2d_view_of_3d;
aco_info->epilog_pc = radv_args->epilog_pc;
aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level);
aco_info->hw_stage = radv_select_hw_stage(radv, compiler_info->ac->gfx_level);
aco_info->next_stage_pc = radv_args->next_stage_pc;
aco_info->schedule_ngg_pos_exports = gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export;
aco_info->lds_size = radv_calculate_lds_size(radv, gfx_level);
aco_info->schedule_ngg_pos_exports =
compiler_info->ac->gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export;
aco_info->lds_size = radv_calculate_lds_size(radv, compiler_info->ac->gfx_level);
}
static inline void

View file

@ -6330,7 +6330,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
}
}
struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state);
struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(&device->compiler_info, &state);
/* Adjust the remapping for alpha-to-coverage without any color attachment and dual-source
* blending to make sure colors written aren't cleared.
@ -12829,7 +12829,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
uint32_t tess_num_patches, tess_lds_size;
radv_get_tess_wg_info(pdev, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out,
radv_get_tess_wg_info(&device->compiler_info, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out,
d->vk.ts.patch_control_points,
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
vs->info.vs.num_linked_outputs, &tess_num_patches, &tess_lds_size);
@ -13104,10 +13104,10 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
gfx10_ngg_set_esgs_ring_itemsize(&es->info, &gs->info, &gs->info.ngg_info);
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
gfx10_get_ngg_info(&device->compiler_info, &es->info, &gs->info, &gs->info.ngg_info);
radv_precompute_registers_hw_ngg(device, gs);
} else {
radv_get_legacy_gs_info(device, &es->info, &gs->info);
radv_get_legacy_gs_info(&device->compiler_info, &es->info, &gs->info);
radv_precompute_registers_hw_gs(device, &es->info, gs);
cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->regs.gs.esgs_ring_size);

View file

@ -289,7 +289,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
key->clear_lds = instance->drirc.misc.clear_lds;
key->cs_wave32 = pdev->cs_wave_size == 32;
key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level;
key->disable_shrink_image_store = instance->drirc.debug.disable_shrink_image_store;
key->disable_sinking_load_input_fs = instance->drirc.debug.disable_sinking_load_input_fs;
key->disable_trunc_coord = instance->drirc.debug.disable_trunc_coord;

View file

@ -36,25 +36,16 @@
#include "vk_format.h"
bool
radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags)
radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
(instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info;
compiler_info->debug.capture_shaders;
}
bool
radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags)
radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
/* Capture shader statistics when RGP is enabled to correlate shader hashes with Fossilize. */
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) ||
(instance->debug_flags & (RADV_DEBUG_DUMP_SHADER_STATS | RADV_DEBUG_PSO_HISTORY)) ||
device->keep_shader_info || (instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) || compiler_info->debug.capture_shader_stats;
}
bool
@ -127,14 +118,13 @@ struct radv_shader_stage_key
radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage,
VkPipelineCreateFlags2 flags, const void *pNext)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const struct radv_compiler_info *compiler_info = &device->compiler_info;
mesa_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
struct vk_pipeline_robustness_state rs;
struct radv_shader_stage_key key = {0};
key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags);
key.keep_executable_info = radv_pipeline_capture_shaders(device, flags);
key.keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, flags);
key.keep_executable_info = radv_pipeline_capture_shaders(compiler_info, flags);
if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT)
key.optimisations_disabled = 1;
@ -149,12 +139,12 @@ radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineS
key.descriptor_heap = 1;
if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
key.version = instance->drirc.misc.override_graphics_shader_version;
key.version = compiler_info->override_graphics_shader_version;
} else if (stage->stage & RADV_RT_STAGE_BITS) {
key.version = instance->drirc.misc.override_ray_tracing_shader_version;
key.version = compiler_info->override_ray_tracing_shader_version;
} else {
assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT);
key.version = instance->drirc.misc.override_compute_shader_version;
key.version = compiler_info->override_compute_shader_version;
}
vk_pipeline_robustness_state_fill(&device->vk.robustness_state, &rs, pNext, stage->pNext);
@ -262,12 +252,11 @@ non_uniform_access_callback(const nir_src *src, void *_)
}
void
radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const bool use_llvm = pdev->use_llvm;
enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
const bool use_llvm = compiler_info->debug.use_llvm;
bool progress;
/* Wave and workgroup size should already be filled. */
@ -318,7 +307,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
if (progress) {
NIR_PASS(_, stage->nir, nir_opt_copy_prop);
NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !pdev->cache_key.disable_shrink_image_store);
NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !compiler_info->cache_key->disable_shrink_image_store);
constant_fold_for_push_const = true;
}
@ -357,7 +346,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
&(ac_nir_lower_image_tex_options){
.gfx_level = gfx_level,
.lower_array_layer_round_even =
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
!compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord,
.fix_derivs_in_divergent_cf = stage->stage == MESA_SHADER_FRAGMENT && !use_llvm,
.max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
});
@ -378,7 +367,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
/* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before
* NGG culling.
*/
NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, device, stage);
NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, compiler_info, stage);
NIR_PASS(_, stage->nir, nir_lower_alu_width, ac_nir_opt_vectorize_cb, &gfx_level);
@ -395,7 +384,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
NIR_PASS(_, stage->nir, nir_opt_move, sink_opts);
} else {
if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
if (stage->stage != MESA_SHADER_FRAGMENT || !compiler_info->cache_key->disable_sinking_load_input_fs)
sink_opts |= nir_move_load_input | nir_move_load_frag_coord;
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
@ -407,13 +396,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
* load_input can be reordered, but buffer loads can't.
*/
if (stage->stage == MESA_SHADER_VERTEX) {
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, compiler_info, stage, gfx_state);
}
/* Lower I/O intrinsics to memory instructions. */
bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
bool io_to_mem = radv_nir_lower_io_to_mem(compiler_info, stage);
if (lowered_ngg) {
radv_lower_ngg(device, stage, gfx_state);
radv_lower_ngg(compiler_info, stage, gfx_state);
} else if (is_last_vgt_stage) {
if (stage->stage != MESA_SHADER_GEOMETRY) {
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level,
@ -477,7 +466,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_ps_late, &late_options);
}
if (radv_shader_should_clear_lds(device, stage->nir)) {
if (radv_shader_should_clear_lds(compiler_info, stage->nir)) {
const unsigned chunk_size = 16; /* max single store size */
const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size);
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
@ -504,18 +493,18 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
.allow_fp16 = gfx_level >= GFX9,
});
NIR_PASS(_, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac,
&(ac_nir_lower_intrinsics_to_args_options){
.gfx_level = gfx_level,
.has_ls_vgpr_init_bug =
pdev->info.compiler_info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog,
.hw_stage = radv_select_hw_stage(&stage->info, gfx_level),
.wave_size = stage->info.wave_size,
.workgroup_size = stage->info.workgroup_size,
.use_llvm = use_llvm,
.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr,
});
NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
NIR_PASS(
_, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac,
&(ac_nir_lower_intrinsics_to_args_options){
.gfx_level = gfx_level,
.has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog,
.hw_stage = radv_select_hw_stage(&stage->info, gfx_level),
.wave_size = stage->info.wave_size,
.workgroup_size = stage->info.workgroup_size,
.use_llvm = use_llvm,
.load_grid_size_from_user_sgpr = compiler_info->load_grid_size_from_user_sgpr,
});
NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, compiler_info->hw.address32_hi);
if (!stage->key.optimisations_disabled) {
NIR_PASS(_, stage->nir, nir_opt_dce);
@ -545,7 +534,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
NIR_PASS(_, stage->nir, nir_lower_int64);
if (pdev->cache_key.mitigate_smem_oob)
if (compiler_info->cache_key->mitigate_smem_oob)
NIR_PASS(_, stage->nir, ac_nir_fixup_mem_access_gfx6, &stage->args.ac, 4096, true, true);
bool opt_intrinsics = false;
@ -639,13 +628,11 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
}
bool
radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
shader->info.stage == MESA_SHADER_TASK) &&
shader->info.shared_size > 0 && pdev->cache_key.clear_lds;
shader->info.shared_size > 0 && compiler_info->cache_key->clear_lds;
}
static uint32_t

View file

@ -26,6 +26,7 @@ struct radv_graphics_state_key;
struct radv_shader_layout;
struct nir_shader;
typedef struct nir_shader nir_shader;
struct radv_compiler_info;
enum radv_pipeline_type {
RADV_PIPELINE_GRAPHICS,
@ -72,9 +73,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_P
return (struct radv_##pipe_type##_pipeline *)pipeline; \
}
bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags);
bool radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags);
bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags);
bool radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags);
bool radv_pipeline_skip_shaders_cache(const struct radv_device *device, const struct radv_pipeline *pipeline);
@ -96,10 +97,10 @@ void radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout,
void radv_pipeline_stage_finish(struct radv_shader_stage *stage);
void radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stage);
void radv_postprocess_nir(const struct radv_compiler_info *compiler_info,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stage);
bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader);
bool radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader);
VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
const VkPipelineShaderStageCreateInfo *pStages,

View file

@ -130,30 +130,16 @@ radv_shader_cache_serialize(struct vk_pipeline_cache_object *object, struct blob
}
static bool
radv_is_cache_disabled(const struct radv_device *device, const struct vk_pipeline_cache *cache)
radv_is_cache_disabled(const struct radv_compiler_info *compiler_info, const struct vk_pipeline_cache *cache)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
/* The buffer address used for debug printf is hardcoded. */
if (device->debug_nir.printf.buffer_addr)
return true;
/* The buffer address used for validating VAs is hardcoded. */
if (device->debug_nir.valid_va.buffer_addr)
return true;
/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
* when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
*/
if ((instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags()))
if (compiler_info->cache_disabled)
return true;
if (!cache) {
/* When the application doesn't provide a pipeline cache and the in-memory cache is also
* disabled.
*/
cache = device->mem_cache;
cache = compiler_info->mem_cache;
if (!cache)
return true;
}
@ -165,7 +151,7 @@ struct radv_shader *
radv_shader_create(struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_shader_binary *binary,
bool skip_cache, struct radv_shader_debug_info *dbg)
{
if (radv_is_cache_disabled(device, cache) || skip_cache || (dbg && dbg->dump_shader)) {
if (radv_is_cache_disabled(&device->compiler_info, cache) || skip_cache || (dbg && dbg->dump_shader)) {
struct radv_shader *shader;
radv_shader_create_uncached(device, binary, false, NULL, dbg, &shader);
return shader;
@ -339,7 +325,7 @@ radv_pipeline_cache_object_search(struct radv_device *device, struct vk_pipeline
{
*found_in_application_cache = false;
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(&device->compiler_info, cache))
return NULL;
bool *found = found_in_application_cache;
@ -404,7 +390,7 @@ radv_compute_pipeline_cache_search(struct radv_device *device, struct vk_pipelin
void
radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline)
{
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(&device->compiler_info, cache))
return;
if (!cache)
@ -490,7 +476,7 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip
pipeline->stages[i].shader = radv_shader_ref(pipeline_obj->shaders[idx++]);
if (pipeline->stages[i].needs_nir) {
pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(device, cache, pipeline->stages[i].blake3);
pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, pipeline->stages[i].blake3);
complete &= pipeline->stages[i].nir != NULL;
}
}
@ -510,7 +496,7 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip
struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
unsigned num_groups)
{
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(&device->compiler_info, cache))
return;
if (!cache)
@ -574,49 +560,48 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip
}
nir_shader *
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, mesa_shader_stage stage,
const blake3_hash key)
radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
mesa_shader_stage stage, const blake3_hash key)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(compiler_info, cache))
return NULL;
if (!cache)
cache = device->mem_cache;
cache = compiler_info->mem_cache;
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL);
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &compiler_info->nir_options[stage], NULL, NULL);
}
void
radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
const nir_shader *nir)
radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
const blake3_hash key, const nir_shader *nir)
{
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(compiler_info, cache))
return;
if (!cache)
cache = device->mem_cache;
cache = compiler_info->mem_cache;
vk_pipeline_cache_add_nir(cache, key, sizeof(blake3_hash), nir);
}
struct vk_pipeline_cache_object *
radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *blake3)
radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
const uint8_t *blake3)
{
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(compiler_info, cache))
return NULL;
if (!cache)
cache = device->mem_cache;
cache = compiler_info->mem_cache;
return vk_pipeline_cache_lookup_object(cache, blake3, BLAKE3_KEY_LEN, &vk_raw_data_cache_object_ops, NULL);
}
struct nir_shader *
radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info,
struct vk_pipeline_cache_object *object)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct blob_reader blob;
struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
blob_reader_init(&blob, nir_object->data, nir_object->data_size);
@ -626,7 +611,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline
ralloc_free(nir);
return NULL;
}
nir->options = &pdev->nir_options[nir->info.stage];
nir->options = &compiler_info->nir_options[nir->info.stage];
return nir;
}
@ -652,7 +637,7 @@ radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline
blob_finish_get_buffer(&blob, &data, &size);
struct vk_pipeline_cache_object *object;
if (cached && !radv_is_cache_disabled(device, cache)) {
if (cached && !radv_is_cache_disabled(&device->compiler_info, cache)) {
object = vk_pipeline_cache_create_and_insert_object(cache, blake3, BLAKE3_KEY_LEN, data, size,
&vk_raw_data_cache_object_ops);
} else {
@ -675,7 +660,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC
*found_in_internal_cache = false;
if (radv_is_cache_disabled(device, cache))
if (radv_is_cache_disabled(&device->compiler_info, cache))
return VK_SUCCESS;
struct vk_pipeline_cache_object *object =
@ -711,7 +696,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC
shader = pipeline_obj->shaders[idx++];
if (data->is_library)
nir = radv_pipeline_cache_lookup_nir_handle(device, cache, data->stages[i].blake3);
nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, data->stages[i].blake3);
result = radv_create_pipeline_binary_from_rt_shader(device, pAllocator, shader, false, data->stages[i].blake3,
&stage_data->info, stage_data->stack_size, nir,

View file

@ -27,6 +27,7 @@ struct radv_shader_binary;
struct radv_shader_debug_info;
struct radv_shader_stage;
struct radv_spirv_to_nir_options;
struct radv_compiler_info;
struct util_dynarray;
struct nir_shader;
typedef struct nir_shader nir_shader;
@ -55,17 +56,18 @@ void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct v
struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
unsigned num_groups);
nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
mesa_shader_stage stage, const blake3_hash key);
nir_shader *radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info,
struct vk_pipeline_cache *cache, mesa_shader_stage stage,
const blake3_hash key);
void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
const nir_shader *nir);
void radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
const blake3_hash key, const nir_shader *nir);
struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device,
struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info,
struct vk_pipeline_cache *cache,
const unsigned char *blake3);
struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device,
struct nir_shader *radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info,
struct vk_pipeline_cache_object *object);
struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device,

View file

@ -98,35 +98,35 @@ radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct
}
struct radv_shader_binary *
radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage, bool keep_executable_info,
bool keep_statistic_info, bool is_internal, struct radv_shader_debug_info *dbg)
radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *cs_stage,
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
struct radv_shader_debug_info *dbg)
{
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
/* Compile SPIR-V shader to NIR. */
cs_stage->nir = radv_shader_spirv_to_nir(device, cs_stage, NULL, is_internal);
cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal);
radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled);
/* Run the shader info pass. */
radv_nir_shader_info_init(cs_stage->stage, MESA_SHADER_NONE, &cs_stage->info);
radv_nir_shader_info_pass(device, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL, RADV_PIPELINE_COMPUTE,
false, &cs_stage->info);
radv_nir_shader_info_pass(compiler_info, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL,
RADV_PIPELINE_COMPUTE, false, &cs_stage->info);
radv_declare_shader_args(device, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &cs_stage->args, dbg);
radv_declare_shader_args(compiler_info, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE,
&cs_stage->args, dbg);
cs_stage->info.user_sgprs_locs = cs_stage->args.user_sgprs_locs;
cs_stage->info.inline_push_constant_mask = cs_stage->args.ac.inline_push_const_mask;
/* Postprocess NIR. */
radv_postprocess_nir(device, NULL, cs_stage);
radv_postprocess_nir(compiler_info, NULL, cs_stage);
dbg->dump_shader = radv_can_dump_shader(device, cs_stage->nir);
bool dump_nir = dbg->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
dbg->dump_shader = radv_can_dump_shader(compiler_info, cs_stage->nir);
bool dump_nir = dbg->dump_shader && compiler_info->debug.dump_nir;
if (dbg->dump_shader) {
simple_mtx_lock(&instance->shader_dump_mtx);
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
if (dump_nir) {
nir_print_shader(cs_stage->nir, stderr);
@ -134,23 +134,23 @@ radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage,
}
/* Compile NIR shader to AMD assembly. */
struct radv_shader_binary *cs_binary =
radv_shader_nir_to_asm(device, cs_stage, &cs_stage->nir, 1, NULL, keep_executable_info, keep_statistic_info);
struct radv_shader_binary *cs_binary = radv_shader_nir_to_asm(compiler_info, cs_stage, &cs_stage->nir, 1, NULL,
keep_executable_info, keep_statistic_info);
/* Dump NIR after nir_to_asm, because ACO modifies it. */
char *nir_string = NULL;
if (keep_executable_info || dbg->dump_shader)
nir_string = radv_dump_nir_shaders(instance, &cs_stage->nir, 1);
nir_string = radv_dump_nir_shaders(compiler_info, &cs_stage->nir, 1);
radv_parse_binary_debug_info(device, cs_binary, dbg);
radv_parse_binary_debug_info(compiler_info, cs_binary, dbg);
dbg->nir_string = nir_string;
dbg->stages = 1 << MESA_SHADER_COMPUTE;
radv_shader_dump_asm(device, dbg, &cs_stage->info);
radv_shader_dump_asm(compiler_info, dbg, &cs_stage->info);
if (dbg->dump_shader)
simple_mtx_unlock(&instance->shader_dump_mtx);
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
if (keep_executable_info && cs_stage->spirv.size) {
dbg->spirv = malloc(cs_stage->spirv.size);
@ -185,8 +185,9 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
struct vk_pipeline_cache *cache, const VkPipelineShaderStageCreateInfo *pStage,
const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
{
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
const struct radv_compiler_info *compiler_info = &device->compiler_info;
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags);
const bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
struct radv_shader_stage cs_stage = {0};
VkPipelineCreationFeedback pipeline_feedback = {
@ -220,8 +221,8 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
radv_pipeline_stage_init(pipeline->base.create_flags, pStage, pipeline_layout, &stage_key, &cs_stage);
struct radv_shader_debug_info cs_dbg = {0};
struct radv_shader_binary *cs_binary = radv_compile_cs(device, &cs_stage, keep_executable_info, keep_statistic_info,
pipeline->base.is_internal, &cs_dbg);
struct radv_shader_binary *cs_binary = radv_compile_cs(compiler_info, &cs_stage, keep_executable_info,
keep_statistic_info, pipeline->base.is_internal, &cs_dbg);
pipeline->base.shaders[MESA_SHADER_COMPUTE] =
radv_shader_create(device, cache, cs_binary, skip_shaders_cache, &cs_dbg);
@ -232,7 +233,7 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
}
free(cs_binary);
if (radv_can_dump_shader_stats(device, cs_stage.nir)) {
if (radv_can_dump_shader_stats(&device->compiler_info, cs_stage.nir)) {
radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], stderr);
}
radv_pipeline_stage_finish(&cs_stage);

View file

@ -42,8 +42,9 @@ void radv_get_compute_shader_metadata(const struct radv_device *device, const st
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
struct radv_shader *shader);
struct radv_shader_binary *radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage,
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
struct radv_shader_binary *radv_compile_cs(const struct radv_compiler_info *compiler_info,
struct radv_shader_stage *cs_stage, bool keep_executable_info,
bool keep_statistic_info, bool is_internal,
struct radv_shader_debug_info *dbg);
VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,

View file

@ -98,18 +98,17 @@ radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor
}
static unsigned
radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
radv_choose_spi_color_format(const struct radv_compiler_info *compiler_info, VkFormat vk_format, bool blend_enable,
bool blend_need_alpha)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct util_format_description *desc = radv_format_description(vk_format);
bool use_rbplus = pdev->info.rbplus_allowed;
bool use_rbplus = compiler_info->hw.rbplus_allowed;
struct ac_spi_color_formats formats = {0};
unsigned format, ntype, swap;
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
format = ac_get_cb_format(compiler_info->ac->gfx_level, desc->format);
ntype = ac_get_cb_number_type(desc->format);
swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false);
swap = ac_translate_colorswap(compiler_info->ac->gfx_level, desc->format, false);
ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
@ -1464,9 +1463,8 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g
}
struct radv_ps_epilog_key
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_state *state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0, no_signed_zero = 0;
struct radv_ps_epilog_key key;
@ -1484,7 +1482,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
} else {
const bool blend_enable = (state->color_blend_enable >> i) & 0x1u;
cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
cf = radv_choose_spi_color_format(compiler_info, fmt, blend_enable, state->need_src_alpha & (1 << i));
uint32_t comp_used = util_format_colormask(vk_format_description(fmt));
@ -1530,9 +1528,9 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
state->alpha_to_coverage_via_mrtz);
key.spi_shader_col_format = col_format;
key.color_is_int8 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
key.color_is_int10 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = pdev->cache_key.enable_mrt_output_nan_fixup ? is_float32 : 0;
key.color_is_int8 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
key.color_is_int10 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
key.enable_mrt_output_nan_fixup = compiler_info->cache_key->enable_mrt_output_nan_fixup ? is_float32 : 0;
key.no_signed_zero = no_signed_zero;
key.colors_written = state->colors_written;
key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf;
@ -1547,7 +1545,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
}
static struct radv_ps_epilog_key
radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state)
radv_pipeline_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info,
const struct vk_graphics_pipeline_state *state)
{
struct radv_ps_epilog_state ps_epilog = {0};
@ -1605,14 +1604,14 @@ radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const str
ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i;
}
return radv_generate_ps_epilog_key(device, &ps_epilog);
return radv_generate_ps_epilog_key(compiler_info, &ps_epilog);
}
static struct radv_graphics_state_key
radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info,
const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, uint32_t custom_blend_mode)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_graphics_state_key key;
memset(&key, 0, sizeof(key));
@ -1646,7 +1645,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
/* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) {
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && compiler_info->ac->gfx_level < GFX8) {
/* From the Vulkan spec 1.2.157:
*
* "If the bound pipeline state object was created with the
@ -1666,7 +1665,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
}
const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(
pdev->info.gfx_level, pdev->info.compiler_info.has_vtx_format_alpha_adjust_bug, format);
compiler_info->ac->gfx_level, compiler_info->ac->has_vtx_format_alpha_adjust_bug, format);
unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
/* If offset is misaligned, then the buffer offset must be too. Just skip updating
@ -1691,7 +1690,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
* alpha-to-one is enabled (alpha to MRTZ.a and one to MRT0.a).
*/
key.ms.alpha_to_coverage_via_mrtz =
alpha_to_coverage_enabled && (pdev->info.gfx_level >= GFX11 || alpha_to_one_enabled);
alpha_to_coverage_enabled && (compiler_info->ac->gfx_level >= GFX11 || alpha_to_one_enabled);
if (state->ms) {
key.ms.sample_shading_enable = state->ms->sample_shading_enable;
@ -1711,29 +1710,29 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
}
if (state->rs) {
if (pdev->info.gfx_level >= GFX10)
if (compiler_info->ac->gfx_level >= GFX10)
key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_CULL_MODE))
key.rs.cull_mode = state->rs->cull_mode;
}
key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && pdev->info.compiler_info.has_vrs_frag_pos_z_bug)
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug)
key.adjust_frag_coord_z = true;
if (radv_pipeline_needs_ps_epilog(state, lib_flags))
key.ps.has_epilog = true;
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(compiler_info, state);
/* Alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also exported.
* Though, when a PS epilog is needed and the MS state is NULL (with dynamic rendering), it's not
* possible to know the info at compile time and MRTZ needs to be exported in the epilog.
*/
if (key.ps.has_epilog) {
if (pdev->info.gfx_level >= GFX11) {
if (compiler_info->ac->gfx_level >= GFX11) {
key.ps.exports_mrtz_via_epilog = alpha_to_coverage_unknown;
} else {
key.ps.exports_mrtz_via_epilog =
@ -1744,7 +1743,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
(!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
if (pdev->use_ngg) {
if (compiler_info->use_ngg) {
VkShaderStageFlags ngg_stage;
if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
@ -1767,7 +1766,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
key.enable_remove_point_size = true;
}
if (device->vk.enabled_features.smoothLines) {
if (compiler_info->smooth_lines) {
/* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time.
* This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also
* simplifies the implementation.
@ -1784,7 +1783,8 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
!(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
}
key.dcc_decompress_gfx11 = pdev->info.gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11;
key.dcc_decompress_gfx11 =
compiler_info->ac->gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11;
return key;
}
@ -1794,6 +1794,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
const struct vk_graphics_pipeline_state *state,
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
{
const struct radv_compiler_info *compiler_info = &device->compiler_info;
VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
struct radv_graphics_pipeline_key key = {0};
uint32_t custom_blend_mode = 0;
@ -1804,7 +1805,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
custom_blend_mode = radv_info->custom_blend_mode;
}
key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags, custom_blend_mode);
key.gfx_state = radv_generate_graphics_state_key(compiler_info, state, lib_flags, custom_blend_mode);
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
@ -1820,12 +1821,10 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
}
static void
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
radv_fill_shader_info_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
VkShaderStageFlagBits active_nir_stages)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (!pdev->cache_key.use_ngg)
if (!compiler_info->cache_key->use_ngg)
return;
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
@ -1836,7 +1835,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
stages[MESA_SHADER_MESH].info.is_ngg = true;
}
if (pdev->info.gfx_level >= GFX11) {
if (compiler_info->ac->gfx_level >= GFX11) {
if (stages[MESA_SHADER_GEOMETRY].nir)
stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
} else {
@ -1861,7 +1860,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
}
if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) ||
(pdev->cache_key.no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) {
(compiler_info->cache_key->no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) {
/* NGG needs to be disabled on GFX10/GFX10.3 when:
* - streamout is used because NGG streamout isn't supported
* - NGG GS is explictly disabled to workaround performance issues
@ -1980,7 +1979,7 @@ radv_get_next_stage(mesa_shader_stage stage, VkShaderStageFlagBits active_nir_st
}
static void
radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type,
radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum radv_pipeline_type pipeline_type,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages,
VkShaderStageFlagBits active_nir_stages)
{
@ -1991,23 +1990,22 @@ radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type
consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
}
radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type,
consider_force_vrs, &stages[i].info);
radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state,
pipeline_type, consider_force_vrs, &stages[i].info);
}
radv_nir_shader_info_link(device, gfx_state, stages);
radv_nir_shader_info_link(compiler_info, gfx_state, stages);
}
static void
radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
radv_declare_pipeline_args(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages,
struct radv_shader_debug_info *debug)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args, &debug[MESA_SHADER_TESS_CTRL]);
stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
@ -2024,8 +2022,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage
if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
mesa_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage,
&stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]);
radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY,
pre_stage, &stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]);
stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
@ -2038,20 +2036,19 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage
}
u_foreach_bit (i, active_nir_stages) {
radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args, &debug[i]);
radv_declare_shader_args(compiler_info, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args,
&debug[i]);
stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
}
}
static struct radv_shader_binary *
radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache,
radv_create_gs_copy_shader(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state,
bool keep_executable_info, bool keep_statistic_info,
struct radv_shader_debug_info *gs_copy_debug)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
const struct radv_shader_info *gs_info = &gs_stage->info;
nir_shader *nir = gs_stage->gs_copy_shader;
@ -2068,8 +2065,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
},
};
radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info);
radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false,
&gs_copy_stage.info);
radv_nir_shader_info_pass(compiler_info, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS,
false, &gs_copy_stage.info);
gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */
gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
gs_copy_stage.info.so = gs_info->so;
@ -2077,59 +2074,56 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY;
radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
radv_declare_shader_args(compiler_info, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
&gs_copy_stage.args, gs_copy_debug);
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
NIR_PASS(
_, nir, ac_nir_lower_intrinsics_to_args, &gs_copy_stage.args.ac,
&(ac_nir_lower_intrinsics_to_args_options){.gfx_level = pdev->info.gfx_level,
.has_ls_vgpr_init_bug = pdev->info.compiler_info.has_ls_vgpr_init_bug,
&(ac_nir_lower_intrinsics_to_args_options){.gfx_level = compiler_info->ac->gfx_level,
.has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug,
.hw_stage = AC_HW_VERTEX_SHADER,
.wave_size = 64,
.workgroup_size = 64,
.use_llvm = pdev->use_llvm});
NIR_PASS(_, nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
.use_llvm = compiler_info->debug.use_llvm});
NIR_PASS(_, nir, radv_nir_lower_abi, compiler_info->ac->gfx_level, &gs_copy_stage, gfx_state, compiler_info->hw.address32_hi);
NIR_PASS(_, nir, ac_nir_lower_global_access);
NIR_PASS(_, nir, nir_lower_int64);
struct radv_graphics_pipeline_key key = {0};
gs_copy_debug->dump_shader = radv_can_dump_shader(device, nir);
gs_copy_debug->dump_shader = radv_can_dump_shader(compiler_info, nir);
if (gs_copy_debug->dump_shader)
simple_mtx_lock(&instance->shader_dump_mtx);
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state,
keep_executable_info, keep_statistic_info);
struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm(
compiler_info, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info, keep_statistic_info);
char *nir_string = NULL;
if (keep_executable_info || gs_copy_debug->dump_shader)
nir_string = radv_dump_nir_shaders(instance, &nir, 1);
nir_string = radv_dump_nir_shaders(compiler_info, &nir, 1);
radv_parse_binary_debug_info(device, gs_copy_binary, gs_copy_debug);
radv_parse_binary_debug_info(compiler_info, gs_copy_binary, gs_copy_debug);
gs_copy_debug->nir_string = nir_string;
gs_copy_debug->stages = 1 << MESA_SHADER_VERTEX;
radv_shader_dump_asm(device, gs_copy_debug, &gs_copy_stage.info);
radv_shader_dump_asm(compiler_info, gs_copy_debug, &gs_copy_stage.info);
if (gs_copy_debug->dump_shader)
simple_mtx_unlock(&instance->shader_dump_mtx);
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
return gs_copy_binary;
}
static void
radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
radv_graphics_shaders_nir_to_asm(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
bool keep_executable_info, bool keep_statistic_info,
VkShaderStageFlagBits active_nir_stages, struct radv_shader_debug_info *debug,
struct radv_shader_binary **binaries, struct radv_shader_debug_info *gs_copy_debug,
struct radv_shader_binary **gs_copy_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
if (!(active_nir_stages & (1 << s)))
continue;
@ -2138,7 +2132,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
unsigned shader_count = 1;
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
if (pdev->info.gfx_level >= GFX9 &&
if (compiler_info->ac->gfx_level >= GFX9 &&
((s == MESA_SHADER_GEOMETRY &&
(active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
(s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
@ -2158,12 +2152,12 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
int64_t stage_start = os_time_get_nano();
for (unsigned i = 0; i < shader_count; ++i)
debug[s].dump_shader |= radv_can_dump_shader(device, nir_shaders[i]);
debug[s].dump_shader |= radv_can_dump_shader(compiler_info, nir_shaders[i]);
bool dump_nir = debug[s].dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
bool dump_nir = debug[s].dump_shader && compiler_info->debug.dump_nir;
if (debug[s].dump_shader) {
simple_mtx_lock(&instance->shader_dump_mtx);
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
if (dump_nir) {
for (uint32_t i = 0; i < shader_count; i++)
@ -2171,23 +2165,23 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
}
}
binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
binaries[s] = radv_shader_nir_to_asm(compiler_info, &stages[s], nir_shaders, shader_count, gfx_state,
keep_executable_info, keep_statistic_info);
/* Dump NIR after nir_to_asm, because ACO modifies it. */
char *nir_string = NULL;
if (keep_executable_info || debug[s].dump_shader)
nir_string = radv_dump_nir_shaders(instance, nir_shaders, shader_count);
nir_string = radv_dump_nir_shaders(compiler_info, nir_shaders, shader_count);
radv_parse_binary_debug_info(device, binaries[s], &debug[s]);
radv_parse_binary_debug_info(compiler_info, binaries[s], &debug[s]);
debug[s].nir_string = nir_string;
for (uint32_t i = 0; i < shader_count; i++)
debug[s].stages |= 1 << nir_shaders[i]->info.stage;
radv_shader_dump_asm(device, &debug[s], &stages[s].info);
radv_shader_dump_asm(compiler_info, &debug[s], &stages[s].info);
if (debug[s].dump_shader)
simple_mtx_unlock(&instance->shader_dump_mtx);
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
if (keep_executable_info && stages[s].spirv.size) {
debug[s].spirv = malloc(stages[s].spirv.size);
@ -2196,7 +2190,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
}
if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
*gs_copy_binary = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
*gs_copy_binary = radv_create_gs_copy_shader(compiler_info, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
keep_executable_info, keep_statistic_info, gs_copy_debug);
}
@ -2402,16 +2396,15 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGr
}
void
radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
struct radv_retained_shaders *retained_shaders, bool noop_fs,
struct radv_shader_debug_info *debug, struct radv_shader_binary **binaries,
struct radv_shader_debug_info *gs_copy_debug, struct radv_shader_binary **gs_copy_binary)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
const bool nir_cache = compiler_info->enable_nir_cache;
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
if (stages[s].stage == MESA_SHADER_NONE)
continue;
@ -2428,12 +2421,12 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
if (nir_cache) {
radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
stages[s].nir = radv_pipeline_cache_lookup_nir(compiler_info, cache, s, key);
}
if (!stages[s].nir) {
stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
stages[s].nir = radv_shader_spirv_to_nir(compiler_info, &stages[s], &options, is_internal);
if (nir_cache)
radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
radv_pipeline_cache_insert_nir(compiler_info, cache, key, stages[s].nir);
}
}
@ -2450,7 +2443,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
active_nir_stages |= mesa_to_vk_shader_stage(i);
}
if (!pdev->info.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
if (!compiler_info->hw.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
nir_shader *task = stages[MESA_SHADER_TASK].nir;
@ -2482,7 +2475,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
}
/* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
radv_fill_shader_info_ngg(device, stages, active_nir_stages);
radv_fill_shader_info_ngg(compiler_info, stages, active_nir_stages);
if (stages[MESA_SHADER_GEOMETRY].nir) {
unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
@ -2624,7 +2617,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
}
/* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
radv_graphics_shaders_link_varyings(stages, pdev->info.gfx_level);
radv_graphics_shaders_link_varyings(stages, compiler_info->ac->gfx_level);
/* Optimize constant clip/cull distance after linking to operate on scalar io in the last
* pre raster stage.
@ -2642,14 +2635,14 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages, debug);
radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug);
radv_foreach_stage (i, active_nir_stages) {
int64_t stage_start = os_time_get_nano();
radv_postprocess_nir(device, gfx_state, &stages[i]);
radv_postprocess_nir(compiler_info, gfx_state, &stages[i]);
stages[i].feedback.duration += os_time_get_nano() - stage_start;
}
@ -2661,16 +2654,16 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
struct radv_shader_stage *stage = gs_stage ? gs_stage : es_stage;
if ((gs_stage ? gs_stage : es_stage)->info.is_ngg) {
gfx10_get_ngg_info(device, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info);
gfx10_get_ngg_info(compiler_info, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info);
stage->info.nir_shared_size = stage->info.ngg_info.lds_size;
}
}
if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg)
radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info);
radv_get_legacy_gs_info(compiler_info, NULL, &stages[MESA_SHADER_GEOMETRY].info);
/* Compile NIR shaders to AMD assembly. */
radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
radv_graphics_shaders_nir_to_asm(compiler_info, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
active_nir_stages, debug, binaries, gs_copy_debug, gs_copy_binary);
}
@ -2835,10 +2828,11 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device,
struct vk_pipeline_cache *cache, bool fast_linking_enabled)
{
const struct radv_compiler_info *compiler_info = &device->compiler_info;
struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
struct radv_shader_binary *gs_copy_binary = NULL;
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags);
bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
struct radv_shader_stage *stages = gfx_state->stages;
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
@ -2918,7 +2912,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0};
struct radv_shader_debug_info gs_copy_debug = {0};
radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
radv_graphics_shaders_compile(compiler_info, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
keep_statistic_info, pipeline->base.is_internal, retained_shaders, noop_fs, debug,
binaries, &gs_copy_debug, &gs_copy_binary);
radv_graphics_shaders_create(device, cache, skip_shaders_cache, pipeline->base.shaders, binaries, debug,
@ -2932,7 +2926,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
free(binaries[i]);
if (stages[i].nir) {
if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) {
if (radv_can_dump_shader_stats(&device->compiler_info, stages[i].nir) && pipeline->base.shaders[i]) {
radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], stderr);
}
}

View file

@ -651,10 +651,10 @@ struct radv_ps_epilog_state {
uint8_t need_src_alpha;
};
struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device,
struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info,
const struct radv_ps_epilog_state *state);
void radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
void radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
struct radv_retained_shaders *retained_shaders, bool noop_fs,

View file

@ -374,16 +374,13 @@ move_rt_instructions(nir_shader *shader)
}
static void
radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
radv_rt_nir_to_asm(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
enum radv_rt_lowering_mode mode, struct radv_shader_stage *stage, uint32_t *payload_size,
uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *stage_info,
const struct radv_ray_tracing_stage_info *traversal_stage_info, bool has_position_fetch,
struct radv_shader_binary **binary, bool keep_executable_info, bool keep_statistic_info,
struct radv_shader_debug_info *debug)
{
struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_instance *instance = radv_physical_device_instance(pdev);
switch (mode) {
case RADV_RT_LOWERING_MODE_MONOLITHIC:
radv_nir_lower_rt_io_monolithic(stage->nir);
@ -399,11 +396,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
/* Gather shader info. */
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
radv_nir_shader_info_init(stage->stage, MESA_SHADER_NONE, &stage->info);
radv_nir_shader_info_pass(device, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, false,
&stage->info);
radv_nir_shader_info_pass(compiler_info, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING,
false, &stage->info);
/* Declare shader arguments. */
radv_declare_shader_args(device, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug);
radv_declare_shader_args(compiler_info, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug);
stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
stage->info.inline_push_constant_mask = stage->args.ac.inline_push_const_mask;
@ -427,7 +424,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
.stack_alignment = 16,
.localized_loads = true,
.vectorizer_callback = ac_nir_mem_vectorize_callback,
.vectorizer_data = &(struct ac_nir_config){pdev->info.gfx_level, !pdev->use_llvm},
.vectorizer_data = &(struct ac_nir_config){compiler_info->ac->gfx_level, !compiler_info->debug.use_llvm},
};
nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, mem_ctx);
}
@ -449,27 +446,27 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
switch (mode) {
case RADV_RT_LOWERING_MODE_MONOLITHIC:
assert(num_shaders == 1);
radv_nir_lower_rt_abi_monolithic(temp_stage.nir, device, pipeline);
radv_nir_lower_rt_abi_monolithic(temp_stage.nir, compiler_info, pipeline);
break;
case RADV_RT_LOWERING_MODE_CPS:
radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, device, pipeline, has_position_fetch,
radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, compiler_info, pipeline, has_position_fetch,
traversal_stage_info);
break;
case RADV_RT_LOWERING_MODE_FUNCTION_CALLS:
assert(num_shaders == 1);
radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size, device,
pipeline);
radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size,
compiler_info, pipeline);
break;
}
/* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
nir_shader_gather_info(temp_stage.nir, radv_get_rt_shader_entrypoint(temp_stage.nir));
radv_nir_shader_info_pass(device, temp_stage.nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING,
false, &stage->info);
radv_nir_shader_info_pass(compiler_info, temp_stage.nir, &stage->layout, &stage->key, NULL,
RADV_PIPELINE_RAY_TRACING, false, &stage->info);
radv_optimize_nir(temp_stage.nir, temp_stage.key.optimisations_disabled);
radv_postprocess_nir(device, NULL, &temp_stage);
radv_postprocess_nir(compiler_info, NULL, &temp_stage);
NIR_PASS(_, stage->nir, radv_nir_lower_call_abi, stage->info.wave_size);
NIR_PASS(_, stage->nir, nir_lower_global_vars_to_local);
@ -484,11 +481,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
radv_gather_unused_args(stage_info, temp_stage.nir);
}
debug->dump_shader = radv_can_dump_shader(device, stage->nir);
bool dump_nir = debug->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
debug->dump_shader = radv_can_dump_shader(compiler_info, stage->nir);
bool dump_nir = debug->dump_shader && compiler_info->debug.dump_nir;
if (debug->dump_shader) {
simple_mtx_lock(&instance->shader_dump_mtx);
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
if (dump_nir) {
for (uint32_t i = 0; i < num_shaders; i++)
@ -497,17 +494,17 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
}
/* Compile NIR shader to AMD assembly. */
*binary =
radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info);
*binary = radv_shader_nir_to_asm(compiler_info, stage, shaders, num_shaders, NULL, keep_executable_info,
keep_statistic_info);
/* Dump NIR after nir_to_asm, because ACO modifies it. */
if (keep_executable_info || debug->dump_shader)
debug->nir_string = radv_dump_nir_shaders(instance, shaders, num_shaders);
debug->nir_string = radv_dump_nir_shaders(compiler_info, shaders, num_shaders);
radv_parse_binary_debug_info(device, *binary, debug);
radv_parse_binary_debug_info(compiler_info, *binary, debug);
debug->stages = 1 << shaders[0]->info.stage;
radv_shader_dump_asm(device, debug, &stage->info);
radv_shader_dump_asm(compiler_info, debug, &stage->info);
if (keep_executable_info && stage->spirv.size) {
debug->spirv = malloc(stage->spirv.size);
@ -516,7 +513,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
}
if (debug->dump_shader)
simple_mtx_unlock(&instance->shader_dump_mtx);
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
ralloc_free(mem_ctx);
}
@ -530,16 +527,18 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
struct radv_serialized_shader_arena_block *replay_block, bool skip_shaders_cache,
bool has_position_fetch, struct radv_shader **out_shader)
{
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
const struct radv_compiler_info *compiler_info = &device->compiler_info;
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.base.create_flags);
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.base.create_flags);
bool replayable = (pipeline->base.base.create_flags &
VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR) &&
!radv_is_traversal_shader(stage->nir);
struct radv_shader_binary *binary;
struct radv_shader_debug_info debug = {0};
radv_rt_nir_to_asm(device, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info, traversal_stage_info,
has_position_fetch, &binary, keep_executable_info, keep_statistic_info, &debug);
radv_rt_nir_to_asm(compiler_info, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info,
traversal_stage_info, has_position_fetch, &binary, keep_executable_info, keep_statistic_info,
&debug);
struct radv_shader *shader;
if (replay_block || replayable) {
@ -559,7 +558,7 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
*out_shader = shader;
if (radv_can_dump_shader_stats(device, stage->nir))
if (radv_can_dump_shader_stats(compiler_info, stage->nir))
radv_dump_shader_stats(device, &pipeline->base.base, shader, stderr);
return shader ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY;
@ -653,10 +652,10 @@ radv_ray_tracing_stage_nir_always_needed(struct radv_ray_tracing_stage *stage)
}
static void
radv_rt_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage, uint32_t *payload_size,
uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info)
radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
uint32_t *payload_size, uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info)
{
stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false);
stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false);
nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) {
unsigned size, alignment;
@ -729,7 +728,7 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
&stage_keys[s], stage);
/* precompile the shader */
radv_rt_spirv_to_nir(device, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info);
radv_rt_spirv_to_nir(&device->compiler_info, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info);
stage->feedback.duration = os_time_get_nano() - stage_start;
}
@ -854,12 +853,13 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
if (ahit_idx == VK_SHADER_UNUSED_KHR && isec_idx == VK_SHADER_UNUSED_KHR)
continue;
const struct radv_compiler_info *compiler_info = &device->compiler_info;
nir_shader *ahit = NULL;
nir_shader *isec = NULL;
if (ahit_idx != VK_SHADER_UNUSED_KHR)
ahit = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[ahit_idx].nir);
ahit = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[ahit_idx].nir);
if (isec_idx != VK_SHADER_UNUSED_KHR)
isec = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[isec_idx].nir);
isec = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[isec_idx].nir);
nir_shader *final_shader;
if (isec) {
@ -934,8 +934,8 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
preprocess = NULL;
/* create traversal shader */
nir_shader *traversal_nir =
radv_build_traversal_shader(device, pipeline, &traversal_info, preprocess, payload_size, hit_attrib_size);
nir_shader *traversal_nir = radv_build_traversal_shader(&device->compiler_info, pipeline, &traversal_info,
preprocess, payload_size, hit_attrib_size);
struct radv_shader_stage traversal_stage = {
.stage = MESA_SHADER_INTERSECTION,
.nir = traversal_nir,
@ -1049,10 +1049,10 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
struct radv_shader_stage prolog_stage = {0};
struct radv_shader_debug_info debug = {0};
radv_build_rt_prolog(device, &prolog_stage, uses_descriptor_heap, &debug);
radv_build_rt_prolog(&device->compiler_info, &prolog_stage, uses_descriptor_heap, &debug);
prolog_stage.nir->options = &pdev->nir_options[MESA_SHADER_COMPUTE];
radv_optimize_nir(prolog_stage.nir, false);
radv_postprocess_nir(device, NULL, &prolog_stage);
radv_postprocess_nir(&device->compiler_info, NULL, &prolog_stage);
NIR_PASS(_, prolog_stage.nir, radv_nir_lower_call_abi, prolog_stage.info.wave_size);
NIR_PASS(_, prolog_stage.nir, nir_lower_global_vars_to_local);

View file

@ -132,10 +132,10 @@ radv_tex_filter_mode(VkSamplerReductionMode mode)
}
static uint32_t
radv_get_max_anisotropy(const struct radv_device *device, const struct vk_sampler_state *sampler_state)
radv_get_max_anisotropy(const struct radv_compiler_info *compiler_info, const struct vk_sampler_state *sampler_state)
{
if (device->force_aniso >= 0)
return device->force_aniso;
if (compiler_info->force_aniso >= 0)
return compiler_info->force_aniso;
if (sampler_state->anisotropy_enable && sampler_state->max_anisotropy > 1.0f)
return (uint32_t)sampler_state->max_anisotropy;
@ -199,19 +199,17 @@ radv_unregister_border_color(struct radv_device *device, uint32_t index)
}
void
radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state,
uint32_t *desc)
radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info,
const struct vk_sampler_state *sampler_state, uint32_t *desc)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const struct radv_instance *instance = radv_physical_device_instance(pdev);
const uint32_t max_aniso = radv_get_max_anisotropy(device, sampler_state);
const uint32_t max_aniso = radv_get_max_anisotropy(compiler_info, sampler_state);
const uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
const unsigned filter_mode = radv_tex_filter_mode(sampler_state->reduction_mode);
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
const bool trunc_coord =
((sampler_state->min_filter == VK_FILTER_NEAREST && sampler_state->mag_filter == VK_FILTER_NEAREST) ||
pdev->info.compiler_info.conformant_trunc_coord) &&
!instance->drirc.debug.disable_trunc_coord;
compiler_info->ac->conformant_trunc_coord) &&
!compiler_info->cache_key->disable_trunc_coord;
const VkBorderColor border_color = radv_get_border_color(sampler_state);
const bool disable_cube_wrap = sampler_state->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
@ -238,12 +236,12 @@ radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_s
.min_lod = sampler_state->min_lod,
.max_lod = sampler_state->max_lod,
.lod_bias = sampler_state->mip_lod_bias,
.aniso_single_level = !instance->drirc.debug.disable_aniso_single_level,
.aniso_single_level = !compiler_info->cache_key->disable_aniso_single_level,
.border_color_type = radv_tex_bordercolor(border_color),
.border_color_ptr = border_color_ptr,
};
ac_build_sampler_descriptor(pdev->info.gfx_level, &ac_state, desc);
ac_build_sampler_descriptor(compiler_info->ac->gfx_level, &ac_state, desc);
}
VkResult
@ -285,7 +283,7 @@ radv_sampler_init(struct radv_device *device, struct radv_sampler *sampler, cons
}
}
radv_make_sampler_descriptor(device, &sampler_state, sampler->state);
radv_make_sampler_descriptor(&device->compiler_info, &sampler_state, sampler->state);
return VK_SUCCESS;
}

View file

@ -14,6 +14,7 @@
#include "vk_sampler.h"
struct radv_device;
struct radv_compiler_info;
struct radv_sampler {
struct vk_sampler vk;
@ -28,7 +29,7 @@ VkResult radv_sampler_init(struct radv_device *device, struct radv_sampler *samp
const VkSamplerCreateInfo *pCreateInfo);
void radv_sampler_finish(struct radv_device *device, struct radv_sampler *sampler);
void radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state,
uint32_t *desc);
void radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info,
const struct vk_sampler_state *sampler_state, uint32_t *desc);
#endif /* RADV_SAMPLER_H */

File diff suppressed because it is too large Load diff

View file

@ -607,7 +607,7 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_
struct radv_shader_stage;
nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage,
nir_shader *radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
const struct radv_spirv_to_nir_options *options, bool is_internal);
void radv_init_shader_arenas(struct radv_device *device);
@ -617,23 +617,22 @@ void radv_destroy_shader_upload_queue(struct radv_device *device);
struct radv_shader_args;
VkResult radv_parse_binary_debug_info(struct radv_device *device, const struct radv_shader_binary *binary,
VkResult radv_parse_binary_debug_info(const struct radv_compiler_info *compiler_info, const struct radv_shader_binary *binary,
struct radv_shader_debug_info *dbg);
VkResult radv_shader_create_uncached(struct radv_device *device, const struct radv_shader_binary *binary,
bool replayable, struct radv_serialized_shader_arena_block *replay_block,
struct radv_shader_debug_info *dbg, struct radv_shader **out_shader);
struct radv_shader_binary *radv_shader_nir_to_asm(struct radv_device *device, struct radv_shader_stage *pl_stage,
struct nir_shader *const *shaders, int shader_count,
const struct radv_graphics_state_key *gfx_state,
struct radv_shader_binary *radv_shader_nir_to_asm(const struct radv_compiler_info *compiler_info,
struct radv_shader_stage *pl_stage, struct nir_shader *const *shaders,
int shader_count, const struct radv_graphics_state_key *gfx_state,
bool keep_shader_info, bool keep_statistic_info);
void radv_shader_dump_asm(struct radv_device *device, const struct radv_shader_debug_info *debug,
void radv_shader_dump_asm(const struct radv_compiler_info *compiler_info, const struct radv_shader_debug_info *debug,
const struct radv_shader_info *info);
struct radv_instance;
char *radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *const *shaders, int shader_count);
char *radv_dump_nir_shaders(const struct radv_compiler_info *compiler_info, struct nir_shader *const *shaders, int shader_count);
VkResult radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq);
@ -692,9 +691,9 @@ unsigned radv_compute_spi_ps_input(enum amd_gfx_level gfx_level, const struct ra
bool radv_is_traversal_shader(nir_shader *nir);
bool radv_can_dump_shader(struct radv_device *device, nir_shader *nir);
bool radv_can_dump_shader(const struct radv_compiler_info *compiler_info, nir_shader *nir);
bool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir);
bool radv_can_dump_shader_stats(const struct radv_compiler_info *compiler_info, nir_shader *nir);
VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
FILE *output);
@ -751,15 +750,16 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs)
return stride;
}
void radv_get_tess_wg_info(const struct radv_physical_device *pdev, const ac_nir_tess_io_info *io_info,
void radv_get_tess_wg_info(const struct radv_compiler_info *compiler_info, const ac_nir_tess_io_info *io_info,
unsigned tcs_vertices_out, unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs,
unsigned *num_patches_per_wg, unsigned *lds_size);
void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
void radv_lower_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage,
const struct radv_graphics_state_key *gfx_state);
bool radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader *nir, uint64_t ps_inputs_read,
unsigned num_vertices_per_primitive, const struct radv_shader_info *info);
bool radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir_shader *nir,
uint64_t ps_inputs_read, unsigned num_vertices_per_primitive,
const struct radv_shader_info *info);
void radv_get_nir_options(struct radv_physical_device *pdev);

View file

@ -10,8 +10,6 @@
*/
#include "radv_shader_args.h"
#include "radv_device.h"
#include "radv_physical_device.h"
#include "radv_shader.h"
#include "util/memstream.h"
@ -322,11 +320,9 @@ declare_ms_input_sgprs(struct radv_shader_args_state *state, const struct radv_s
}
static void
declare_ms_input_vgprs(struct radv_shader_args_state *state, const struct radv_device *device)
declare_ms_input_vgprs(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (pdev->info.mesh_fast_launch_2) {
if (compiler_info->hw.mesh_fast_launch_2) {
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed);
} else {
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.vertex_id);
@ -378,13 +374,13 @@ declare_ngg_sgprs(struct radv_shader_args_state *state, const struct radv_shader
}
static void
radv_init_shader_args(struct radv_shader_args_state *state, const struct radv_device *device, mesa_shader_stage stage)
radv_init_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state,
mesa_shader_stage stage)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
memset(state->args, 0, sizeof(*state->args));
state->args->explicit_scratch_args = !pdev->use_llvm;
state->args->remap_spi_ps_input = !pdev->use_llvm;
state->args->explicit_scratch_args = !compiler_info->debug.use_llvm;
state->args->remap_spi_ps_input = !compiler_info->debug.use_llvm;
for (int i = 0; i < MAX_SETS; i++)
state->args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
@ -557,12 +553,11 @@ declare_unmerged_vs_tes_gs_args(struct radv_shader_args_state *state, const enum
}
static void
declare_shader_args(struct radv_shader_args_state *state, const struct radv_device *device,
declare_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state,
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct user_sgpr_info *user_sgpr_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
(stage == MESA_SHADER_MESH && info->ms.has_query) ||
@ -590,7 +585,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
}
}
radv_init_shader_args(state, device, stage);
radv_init_shader_args(compiler_info, state, stage);
if (mesa_shader_stage_is_rt(stage)) {
return;
@ -617,7 +612,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info);
if (info->cs.uses_grid_size) {
if (device->load_grid_size_from_user_sgpr)
if (compiler_info->load_grid_size_from_user_sgpr)
RADV_ADD_UD_ARG(state, 3, AC_ARG_VALUE, ac.num_work_groups, AC_UD_CS_GRID_SIZE);
else
RADV_ADD_UD_ARG(state, 2, AC_ARG_CONST_ADDR, ac.num_work_groups, AC_UD_CS_GRID_SIZE);
@ -660,7 +655,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
RADV_ADD_ARG(state, AC_ARG_SGPR, 1, AC_ARG_VALUE, ac.scratch_offset);
}
if (pdev->info.compiler_info.local_invocation_ids_packed) {
if (compiler_info->ac->local_invocation_ids_packed) {
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed);
} else {
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_id_x);
@ -830,11 +825,11 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
declare_ngg_sgprs(state, info, ngg_needs_state_sgpr);
if (pdev->info.gfx_level >= GFX11 && has_shader_query)
if (gfx_level >= GFX11 && has_shader_query)
RADV_ADD_UD_ARG(state, 1, AC_ARG_VALUE, ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
}
if (previous_stage != MESA_SHADER_MESH || !pdev->info.mesh_fast_launch_2) {
if (previous_stage != MESA_SHADER_MESH || !compiler_info->hw.mesh_fast_launch_2) {
if (gfx_level >= GFX12) {
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_vtx_offset, 0);
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_prim_id);
@ -854,7 +849,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
} else if (previous_stage == MESA_SHADER_TESS_EVAL) {
declare_tes_input_vgprs(state);
} else if (previous_stage == MESA_SHADER_MESH) {
declare_ms_input_vgprs(state, device);
declare_ms_input_vgprs(compiler_info, state);
}
} else {
declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info);
@ -964,9 +959,10 @@ radv_gather_shader_args_debug_info(struct radv_shader_args_state *state, struct
}
void
radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
const struct radv_shader_info *info, mesa_shader_stage stage, mesa_shader_stage previous_stage,
struct radv_shader_args *args, struct radv_shader_debug_info *debug)
radv_declare_shader_args(const struct radv_compiler_info *compiler_info,
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args,
struct radv_shader_debug_info *debug)
{
struct radv_shader_args_state state = {
.args = args,
@ -975,7 +971,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
struct user_sgpr_info user_sgpr_info = {0};
if (!mesa_shader_stage_is_rt(stage)) {
declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, NULL);
declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, NULL);
uint32_t num_user_sgprs = args->num_user_sgprs;
if (info->loads_push_constants)
@ -986,8 +982,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
num_user_sgprs++;
}
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
uint32_t available_sgprs =
gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
@ -1012,13 +1007,13 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
allocate_inline_push_consts(info, &user_sgpr_info);
}
state.gather_debug_info = debug && device->keep_shader_info;
state.gather_debug_info = debug && compiler_info->debug.keep_shader_info;
if (state.gather_debug_info) {
state.ctx = ralloc_context(NULL);
state.gather_debug_info &= !!state.ctx;
}
declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, &user_sgpr_info);
declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, &user_sgpr_info);
if (state.gather_debug_info)
radv_gather_shader_args_debug_info(&state, debug);
@ -1027,14 +1022,14 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
}
void
radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key,
struct radv_shader_args *args)
{
struct radv_shader_args_state state = {
.args = args,
};
radv_init_shader_args(&state, device, MESA_SHADER_FRAGMENT);
radv_init_shader_args(compiler_info, &state, MESA_SHADER_FRAGMENT);
/* Declare VGPR arguments for depth/stencil/sample exports. */
if (key->export_depth)

View file

@ -135,15 +135,15 @@ radv_shader_args_from_ac(struct ac_shader_args *args)
struct radv_graphics_state_key;
struct radv_shader_info;
struct radv_ps_epilog_key;
struct radv_device;
struct radv_shader_debug_info;
struct radv_compiler_info;
void radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
const struct radv_shader_info *info, mesa_shader_stage stage,
mesa_shader_stage previous_stage, struct radv_shader_args *args,
void radv_declare_shader_args(const struct radv_compiler_info *compiler_info,
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args,
struct radv_shader_debug_info *debug);
void radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
void radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key,
struct radv_shader_args *args);
void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args);

View file

@ -481,13 +481,12 @@ radv_compute_esgs_itemsize(enum amd_gfx_level gfx_level, uint32_t num_varyings)
}
static void
gather_shader_info_ngg_query(struct radv_device *device, struct radv_shader_info *info)
gather_shader_info_ngg_query(const struct radv_compiler_info *compiler_info, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY;
info->gs.has_pipeline_stat_query =
compiler_info->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY;
info->has_xfb_query = !!info->so.enabled_stream_buffers_mask;
info->has_prim_query = device->cache_key.primitives_generated_query || info->has_xfb_query;
info->has_prim_query = compiler_info->primitives_generated_query || info->has_xfb_query;
}
uint64_t
@ -525,12 +524,10 @@ radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t ni
}
static void
gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
gather_shader_info_vs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key,
struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
if (radv_use_vs_prolog(nir, gfx_state)) {
info->vs.has_prolog = true;
info->vs.dynamic_inputs = true;
@ -567,19 +564,18 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
info->vs.as_ls = true;
} else if (info->next_stage == MESA_SHADER_GEOMETRY) {
info->vs.as_es = true;
info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->vs.num_linked_outputs);
info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->vs.num_linked_outputs);
}
if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) {
gather_shader_info_ngg_query(device, info);
gather_shader_info_ngg_query(compiler_info, info);
}
}
static void
gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
gather_shader_info_tcs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
nir_tcs_info tcs_info;
@ -597,7 +593,7 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
if (gfx_state->ts.patch_control_points) {
radv_get_tess_wg_info(pdev, &info->tcs.io_info, nir->info.tess.tcs_vertices_out,
radv_get_tess_wg_info(compiler_info, &info->tcs.io_info, nir->info.tess.tcs_vertices_out,
gfx_state->ts.patch_control_points,
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
info->tcs.num_linked_inputs, &info->num_tess_patches, &info->tcs.lds_size);
@ -605,10 +601,9 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
}
static void
gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
gather_shader_info_tes(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
info->gs_inputs_read = ~0ULL;
info->tes._primitive_mode = nir->info.tess._primitive_mode;
info->tes.spacing = nir->info.tess.spacing;
@ -629,11 +624,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
if (info->next_stage == MESA_SHADER_GEOMETRY) {
info->tes.as_es = true;
info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->tes.num_linked_outputs);
info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->tes.num_linked_outputs);
}
if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) {
gather_shader_info_ngg_query(device, info);
gather_shader_info_ngg_query(compiler_info, info);
}
}
@ -678,10 +673,9 @@ radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shade
}
void
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info,
radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
struct radv_shader_info *gs_info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info;
const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize;
ac_legacy_gs_subgroup_info info;
@ -689,7 +683,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations,
esgs_vertex_stride, &info);
const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(pdev->info.gfx_level);
const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(compiler_info->ac->gfx_level);
const uint32_t total_lds_bytes = align(info.esgs_lds_size * 4, lds_granularity);
out->gs_inst_prims_in_subgroup = info.gs_inst_prims_in_subgroup;
@ -699,10 +693,9 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
}
static void
gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
gather_shader_info_gs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
info->gs.vertices_in = nir->info.gs.vertices_in;
info->gs.vertices_out = nir->info.gs.vertices_out;
info->gs.input_prim = nir->info.gs.input_primitive;
@ -713,13 +706,14 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct
info->gs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
if (info->is_ngg)
gather_shader_info_ngg_query(device, info);
gather_shader_info_ngg_query(compiler_info, info);
else
info->legacy_gs_info.esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->gs.num_linked_inputs);
info->legacy_gs_info.esgs_itemsize =
radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->gs.num_linked_inputs);
}
static void
gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
gather_shader_info_mesh(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info)
{
struct gfx10_ngg_info *ngg_info = &info->ngg_info;
@ -760,17 +754,17 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
ngg_info->vgt_esgs_ring_itemsize = 1;
info->ms.has_query = device->cache_key.mesh_shader_queries;
info->ms.has_query = compiler_info->mesh_shader_queries;
info->ms.has_task = stage_key->has_task_shader;
}
static void
calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
calc_mesh_workgroup_size(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
if (pdev->info.mesh_fast_launch_2) {
if (compiler_info->hw.mesh_fast_launch_2) {
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
info->workgroup_size = api_workgroup_size;
} else {
@ -884,7 +878,7 @@ gather_shader_info_rt(const nir_shader *nir, struct radv_shader_info *info)
}
static void
gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
gather_shader_info_task(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info)
{
/* Task shaders always need these for the I/O lowering even if the API shader doesn't actually
@ -906,7 +900,7 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
info->cs.linear_taskmesh_dispatch =
nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1;
info->cs.has_query = device->cache_key.mesh_shader_queries;
info->cs.has_query = compiler_info->mesh_shader_queries;
}
static uint32_t
@ -1000,12 +994,11 @@ radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage,
}
void
radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir,
const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key,
const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type,
bool consider_force_vrs, struct radv_shader_info *info)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
if (layout->use_dynamic_descriptors) {
@ -1060,11 +1053,11 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
/* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */
info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) ||
(nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11);
(nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level < GFX11);
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
radv_shader_should_clear_lds(device, nir);
radv_shader_should_clear_lds(compiler_info, nir);
info->cs.derivative_group = nir->info.derivative_group;
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
@ -1073,8 +1066,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
info->cs.block_size[i] = nir->info.workgroup_size[i];
}
info->user_data_0 = radv_get_user_data_0(pdev->info.gfx_level, info);
info->merged_shader_compiled_separately = radv_is_merged_shader_compiled_separately(pdev->info.gfx_level, info);
info->user_data_0 = radv_get_user_data_0(compiler_info->ac->gfx_level, info);
info->merged_shader_compiled_separately =
radv_is_merged_shader_compiled_separately(compiler_info->ac->gfx_level, info);
info->force_indirect_descriptors = info->merged_shader_compiled_separately || stage_key->indirect_bindable;
info->descriptor_heap = stage_key->descriptor_heap;
@ -1082,25 +1076,25 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
case MESA_SHADER_COMPUTE:
break;
case MESA_SHADER_TASK:
gather_shader_info_task(device, nir, stage_key, info);
gather_shader_info_task(compiler_info, nir, stage_key, info);
break;
case MESA_SHADER_FRAGMENT:
gather_shader_info_fs(pdev->info.gfx_level, nir, gfx_state, info);
gather_shader_info_fs(compiler_info->ac->gfx_level, nir, gfx_state, info);
break;
case MESA_SHADER_GEOMETRY:
gather_shader_info_gs(device, nir, info);
gather_shader_info_gs(compiler_info, nir, info);
break;
case MESA_SHADER_TESS_EVAL:
gather_shader_info_tes(device, nir, info);
gather_shader_info_tes(compiler_info, nir, info);
break;
case MESA_SHADER_TESS_CTRL:
gather_shader_info_tcs(device, nir, gfx_state, info);
gather_shader_info_tcs(compiler_info, nir, gfx_state, info);
break;
case MESA_SHADER_VERTEX:
gather_shader_info_vs(device, nir, gfx_state, stage_key, info);
gather_shader_info_vs(compiler_info, nir, gfx_state, stage_key, info);
break;
case MESA_SHADER_MESH:
gather_shader_info_mesh(device, nir, stage_key, info);
gather_shader_info_mesh(compiler_info, nir, stage_key, info);
break;
default:
if (mesa_shader_stage_is_rt(nir->info.stage))
@ -1111,7 +1105,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
info->wave_size = nir->info.min_subgroup_size;
assert(info->wave_size == nir->info.max_subgroup_size);
assert(info->wave_size == 32 || info->wave_size == 64);
assert(pdev->info.gfx_level >= GFX10 || info->wave_size == 64);
assert(compiler_info->ac->gfx_level >= GFX10 || info->wave_size == 64);
assert(nir->info.stage != MESA_SHADER_GEOMETRY || info->is_ngg || info->wave_size == 64);
switch (nir->info.stage) {
@ -1140,7 +1134,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
case MESA_SHADER_TESS_CTRL:
if (gfx_state->ts.patch_control_points) {
info->workgroup_size =
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
ac_compute_lshs_workgroup_size(compiler_info->ac->gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
} else {
/* Set the maximum possible value when the workgroup size can't be determined. */
@ -1168,7 +1162,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
* Set the maximum possible value by default, this will be optimized during linking if
* possible.
*/
if (pdev->info.gfx_level <= GFX8)
if (compiler_info->ac->gfx_level <= GFX8)
info->workgroup_size = info->wave_size;
else
info->workgroup_size = 256;
@ -1181,7 +1175,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
}
break;
case MESA_SHADER_MESH:
calc_mesh_workgroup_size(device, nir, info);
calc_mesh_workgroup_size(compiler_info, nir, info);
break;
default:
/* FS always operates without workgroups. Other stages are computed during linking but assume
@ -1255,11 +1249,10 @@ gfx10_get_ngg_vert_prim_lds_size(const struct radv_shader_info *es_info, const s
}
void
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
struct gfx10_ngg_info *out)
gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
const struct radv_shader_info *stage_info = gs_info ? gs_info : es_info;
const unsigned gs_num_invocations = gs_info ? MAX2(gs_info->gs.invocations, 1) : 1;
const unsigned input_prim = radv_get_pre_rast_input_topology(es_info, gs_info);
@ -1301,10 +1294,9 @@ gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_s
}
static void
radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *ngg_stage,
radv_determine_ngg_settings(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage,
struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
uint64_t ps_inputs_read;
assert(ngg_stage->info.is_ngg);
@ -1337,11 +1329,12 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
}
ngg_stage->info.has_ngg_culling =
radv_consider_culling(pdev, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info);
radv_consider_culling(compiler_info, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info);
if (ngg_stage->stage != MESA_SHADER_GEOMETRY) {
nir_function_impl *impl = nir_shader_get_entrypoint(ngg_stage->nir);
ngg_stage->info.has_ngg_early_prim_export = pdev->info.gfx_level < GFX11 && exec_list_is_singular(&impl->body);
ngg_stage->info.has_ngg_early_prim_export =
compiler_info->ac->gfx_level < GFX11 && exec_list_is_singular(&impl->body);
/* NGG passthrough mode should be disabled when culling and when the vertex shader
* exports the primitive ID.
@ -1353,10 +1346,9 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
}
static void
radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *stages,
radv_link_shaders_info(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
const struct radv_graphics_state_key *gfx_state)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
struct radv_shader_stage *vs_stage = stages[MESA_SHADER_VERTEX].nir ? &stages[MESA_SHADER_VERTEX] : NULL;
struct radv_shader_stage *tcs_stage = stages[MESA_SHADER_TESS_CTRL].nir ? &stages[MESA_SHADER_TESS_CTRL] : NULL;
struct radv_shader_stage *tes_stage = stages[MESA_SHADER_TESS_EVAL].nir ? &stages[MESA_SHADER_TESS_EVAL] : NULL;
@ -1382,15 +1374,15 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
const bool ps_prim_id_in = !fs_stage || fs_stage->info.ps.prim_id_input;
const bool ps_clip_dists_in = !fs_stage || !!fs_stage->info.ps.input_clips_culls_mask;
radv_set_vs_output_param(pdev->info.gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info, ps_prim_id_in,
ps_clip_dists_in);
radv_set_vs_output_param(compiler_info->ac->gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info,
ps_prim_id_in, ps_clip_dists_in);
}
if (prerast_stage && !ms_stage) {
/* Compute NGG info (GFX10+) or GS info. */
if (ngg_stage) {
/* Determine other NGG settings like culling. */
radv_determine_ngg_settings(device, ngg_stage, fs_stage, gfx_state);
radv_determine_ngg_settings(compiler_info, ngg_stage, fs_stage, gfx_state);
if (es_stage) {
gfx10_ngg_set_esgs_ring_itemsize(&es_stage->info, gs_stage ? &gs_stage->info : NULL,
@ -1412,11 +1404,11 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
vs_stage->info.vs.tcs_inputs_via_lds = tcs_stage->nir->info.inputs_read;
if (gfx_state->ts.patch_control_points) {
vs_stage->info.workgroup_size =
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
compiler_info->ac->gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
if (!pdev->use_llvm) {
if (!compiler_info->debug.use_llvm) {
/* When the number of TCS input and output vertices are the same (typically 3):
* - There is an equal amount of LS and HS invocations
* - In case of merged LSHS shaders, the LS and HS halves of the shader always process
@ -1427,7 +1419,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
* instruction dominating another with a different mode.
*/
vs_stage->info.vs.tcs_in_out_eq =
pdev->info.gfx_level >= GFX9 &&
compiler_info->ac->gfx_level >= GFX9 &&
gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out &&
vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
@ -1493,14 +1485,12 @@ radv_nir_shader_info_merge(const struct radv_shader_stage *src, struct radv_shad
}
void
radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stages)
radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages)
{
const struct radv_physical_device *pdev = radv_device_physical(device);
radv_link_shaders_info(compiler_info, stages, gfx_state);
radv_link_shaders_info(device, stages, gfx_state);
if (pdev->info.gfx_level >= GFX9) {
if (compiler_info->ac->gfx_level >= GFX9) {
/* Merge shader info for VS+TCS. */
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) {
radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]);

View file

@ -327,7 +327,7 @@ struct radv_shader_regs {
void radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage, struct radv_shader_info *info);
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
void radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir,
const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key,
const struct radv_graphics_state_key *gfx_state,
const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
@ -335,16 +335,17 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad
void radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shader_regs *regs,
const struct radv_shader_info *es_info, const struct radv_shader_info *gs_info);
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
void radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
struct radv_shader_info *gs_info);
void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info,
void gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);
void gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
struct gfx10_ngg_info *out);
void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
struct radv_shader_stage *stages);
void radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info,
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages);
enum ac_hw_stage radv_select_hw_stage(const struct radv_shader_info *const info, const enum amd_gfx_level gfx_level);

View file

@ -170,7 +170,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0};
struct radv_shader_debug_info gs_copy_debug = {0};
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
&gs_copy_debug, &shader_obj->gs.copy_binary);
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader,
shader_obj->gs.copy_binary, &gs_copy_debug);
@ -202,7 +202,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
radv_shader_stage_init(pCreateInfo, &stages[stage]);
stages[stage].next_stage = next_stage;
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug,
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug,
binaries, &gs_copy_debug, &shader_obj->gs.copy_binary);
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader,
shader_obj->gs.copy_binary, &gs_copy_debug);
@ -251,7 +251,7 @@ radv_shader_object_init_compute(struct radv_shader_object *shader_obj, struct ra
radv_shader_stage_init(pCreateInfo, &stage);
struct radv_shader_debug_info cs_dbg = {0};
struct radv_shader_binary *cs_binary = radv_compile_cs(device, &stage, false, false, false, &cs_dbg);
struct radv_shader_binary *cs_binary = radv_compile_cs(&device->compiler_info, &stage, false, false, false, &cs_dbg);
struct radv_shader *cs_shader = radv_shader_create(device, NULL, cs_binary, true, &cs_dbg);
ralloc_free(stage.nir);
@ -474,7 +474,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
struct radv_shader_binary *gs_copy_binary = NULL;
struct radv_shader_debug_info gs_copy_debug = {0};
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
&gs_copy_debug, &gs_copy_binary);
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &gs_copy_shader, gs_copy_binary,
&gs_copy_debug);