diff --git a/src/amd/vulkan/nir/radv_nir.h b/src/amd/vulkan/nir/radv_nir.h index 98f412da14a..b54e45d7eec 100644 --- a/src/amd/vulkan/nir/radv_nir.h +++ b/src/amd/vulkan/nir/radv_nir.h @@ -17,17 +17,17 @@ extern "C" { #endif typedef struct nir_shader nir_shader; -struct radeon_info; struct radv_shader_stage; struct radv_shader_info; struct radv_shader_args; struct radv_shader_layout; -struct radv_device; struct radv_graphics_state_key; struct radv_ps_epilog_key; struct radv_debug_nir; +struct radv_compiler_info; -bool radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage); +bool radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const struct radv_shader_stage *stage); bool radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage, const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi); @@ -36,10 +36,11 @@ bool radv_nir_lower_hit_attrib_derefs(nir_shader *shader); bool radv_nir_lower_ray_payload_derefs(nir_shader *shader, uint32_t offset); -bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device); +bool radv_nir_lower_ray_queries(nir_shader *shader, const struct radv_compiler_info *compiler_info); -bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage, - const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info); +bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const struct radv_shader_stage *vs_stage, + const struct radv_graphics_state_key *gfx_state); bool radv_nir_optimize_vs_inputs_to_const(nir_shader *shader, const struct radv_graphics_state_key *gfx_state); @@ -63,7 +64,7 @@ unsigned radv_map_io_driver_location(unsigned semantic); void radv_nir_lower_io(nir_shader *nir); -bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage); +bool radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage); bool radv_nir_lower_cooperative_matrix(nir_shader *shader, enum amd_gfx_level gfx_level, unsigned wave_size); @@ -95,7 +96,7 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state, unsigned vgt_outprim_type); -bool radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device, +bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info, const struct radv_shader_stage *stage); void radv_nir_lower_callee_signature(nir_function *function); diff --git a/src/amd/vulkan/nir/radv_nir_lower_descriptors.c b/src/amd/vulkan/nir/radv_nir_lower_descriptors.c index a80e85c6dae..a55a335cac7 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_descriptors.c +++ b/src/amd/vulkan/nir/radv_nir_lower_descriptors.c @@ -10,9 +10,7 @@ #include "nir_builder.h" #include "radv_descriptor_set.h" #include "radv_descriptors.h" -#include "radv_device.h" #include "radv_nir.h" -#include "radv_physical_device.h" #include "radv_shader.h" #include "radv_shader_args.h" #include "sid.h" @@ -676,21 +674,22 @@ lower_descriptors_tex(nir_builder *b, lower_descriptors_state *state, nir_tex_in } bool -radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage) +radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const struct radv_shader_stage *stage) { bool progress = false; - const struct radv_physical_device *pdev = radv_device_physical(device); lower_descriptors_state state = { - .gfx_level = pdev->info.gfx_level, - .address32_hi = pdev->info.address32_hi, - .sampled_image_desc_size = radv_get_sampled_image_desc_size(pdev), - .combined_image_sampler_desc_size = radv_get_combined_image_sampler_desc_size(pdev), - .combined_image_sampler_offset = radv_get_combined_image_sampler_offset(pdev), - .disable_aniso_single_level = pdev->cache_key.disable_aniso_single_level, - .has_image_load_dcc_bug = pdev->info.compiler_info.has_image_load_dcc_bug, + .gfx_level = compiler_info->ac->gfx_level, + .address32_hi = compiler_info->hw.address32_hi, + .sampled_image_desc_size = compiler_info->sampled_image_desc_size, + .combined_image_sampler_desc_size = compiler_info->combined_image_sampler_desc_size, + .combined_image_sampler_offset = compiler_info->combined_image_sampler_offset, + .disable_aniso_single_level = + compiler_info->cache_key->disable_aniso_single_level && compiler_info->ac->gfx_level < GFX8, + .has_image_load_dcc_bug = compiler_info->ac->has_image_load_dcc_bug, .disable_tg4_trunc_coord = - !pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord, + !compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord, .args = &stage->args, .info = &stage->info, .layout = &stage->layout, diff --git a/src/amd/vulkan/nir/radv_nir_lower_immediate_samplers.c b/src/amd/vulkan/nir/radv_nir_lower_immediate_samplers.c index f485922e4ce..8676e035786 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_immediate_samplers.c +++ b/src/amd/vulkan/nir/radv_nir_lower_immediate_samplers.c @@ -7,7 +7,6 @@ #include "nir.h" #include "nir_builder.h" #include "radv_descriptor_set.h" -#include "radv_device.h" #include "radv_nir.h" #include "radv_physical_device.h" #include "radv_sampler.h" @@ -19,8 +18,7 @@ * forbidden). */ typedef struct { - const struct radv_device *device; - bool disable_tg4_trunc_coord; + const struct radv_compiler_info *compiler_info; const struct radv_shader_layout *layout; } lower_immediate_samplers_state; @@ -31,8 +29,10 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data) b->cursor = nir_before_instr(&tex->instr); + const bool disable_tg4_trunc_coord = + !state->compiler_info->ac->conformant_trunc_coord && !state->compiler_info->cache_key->disable_trunc_coord; const uint32_t dword0_mask = - tex->op == nir_texop_tg4 && state->disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu; + tex->op == nir_texop_tg4 && disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu; if (tex->embedded_sampler) { const struct vk_sampler_state_array *embedded_samplers = &state->layout->embedded_samplers; @@ -42,7 +42,7 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data) assert(sampler_idx < embedded_samplers->sampler_count); const struct vk_sampler_state *sampler_state = &embedded_samplers->samplers[sampler_idx]; - radv_make_sampler_descriptor(state->device, sampler_state, desc); + radv_make_sampler_descriptor(state->compiler_info, sampler_state, desc); nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle, nir_imm_ivec4(b, desc[0] & dword0_mask, desc[1], desc[2], desc[3])); @@ -89,14 +89,11 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data) } bool -radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage) +radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const struct radv_shader_stage *stage) { - const struct radv_physical_device *pdev = radv_device_physical(device); - lower_immediate_samplers_state state = { - .device = device, - .disable_tg4_trunc_coord = - !pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord, + .compiler_info = compiler_info, .layout = &stage->layout, }; diff --git a/src/amd/vulkan/nir/radv_nir_lower_io.c b/src/amd/vulkan/nir/radv_nir_lower_io.c index a5b80bc4e2d..b96d9134660 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_io.c +++ b/src/amd/vulkan/nir/radv_nir_lower_io.c @@ -10,9 +10,7 @@ #include "nir.h" #include "nir_builder.h" #include "nir_tcs_info.h" -#include "radv_device.h" #include "radv_nir.h" -#include "radv_physical_device.h" #include "radv_shader.h" static int @@ -85,9 +83,8 @@ radv_map_io_driver_location(unsigned semantic) } bool -radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage) +radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage) { - const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &stage->info; ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location; ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location; @@ -95,16 +92,16 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s if (nir->info.stage == MESA_SHADER_VERTEX) { if (info->vs.as_ls) { - NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq, - info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); + NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, compiler_info->ac->gfx_level, + info->vs.tcs_in_out_eq, info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); return true; } else if (info->vs.as_es) { - NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, + NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize, info->gs_inputs_read); return true; } } else if (nir->info.stage == MESA_SHADER_TESS_CTRL) { - NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq, + NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, info->vs.tcs_in_out_eq, info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds); nir_tcs_info tcs_info; @@ -113,21 +110,21 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s ac_nir_get_tess_io_info(nir, &tcs_info, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, map_output, true, &tess_io_info); - NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output, pdev->info.gfx_level, - info->wave_size); + NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output, + compiler_info->ac->gfx_level, info->wave_size); return true; } else if (nir->info.stage == MESA_SHADER_TESS_EVAL) { NIR_PASS(_, nir, ac_nir_lower_tes_inputs_to_mem, map_input); if (info->tes.as_es) { - NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize, + NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize, info->gs_inputs_read); } return true; } else if (nir->info.stage == MESA_SHADER_GEOMETRY) { - NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false); + NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, false); return true; } else if (nir->info.stage == MESA_SHADER_TASK) { ac_nir_lower_task_outputs_to_mem(nir, info->cs.has_query); diff --git a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c index 810f62f06f6..e77d3adacc0 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c +++ b/src/amd/vulkan/nir/radv_nir_lower_ray_queries.c @@ -154,7 +154,7 @@ struct ray_query_vars { static void init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray_query_vars *dst, const char *base_name, - const struct radv_physical_device *pdev) + const struct radv_compiler_info *compiler_info) { memset(dst, 0, sizeof(*dst)); @@ -162,23 +162,23 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2]; uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8; /* ds_bvh_stack* instructions use a fixed stride of 32 dwords. */ - if (radv_use_bvh_stack_rtn(pdev)) + if (radv_use_bvh_stack_rtn(compiler_info)) workgroup_size = align(workgroup_size, 32); uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4; uint32_t shared_offset = align(shader->info.shared_size, 4); if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) || - shared_offset + shared_stack_size > pdev->info.lds_size_per_workgroup) { + shared_offset + shared_stack_size > compiler_info->hw.lds_size_per_workgroup) { dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT; } else { - if (radv_use_bvh_stack_rtn(pdev)) { + if (radv_use_bvh_stack_rtn(compiler_info)) { /* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords, or 16383 dwords on * gfx12+. */ uint32_t num_wave32_groups = workgroup_size / 32; uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries; uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base; - uint32_t max_hw_stack_base = pdev->info.gfx_level >= GFX12 ? 16384 : 8192; + uint32_t max_hw_stack_base = compiler_info->ac->gfx_level >= GFX12 ? 16384 : 8192; dst->use_bvh_stack_rtn = max_stack_base < max_hw_stack_base; } dst->shared_stack = true; @@ -194,11 +194,11 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray static void lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht, - const struct radv_physical_device *pdev) + const struct radv_compiler_info *compiler_info) { struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars); - init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, pdev); + init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, compiler_info); _mesa_hash_table_insert(ht, ray_query, vars); } @@ -266,10 +266,8 @@ enum rq_intersection_type { intersection_type_none, intersection_type_triangle, static void lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq, - struct radv_device *device) + const struct radv_compiler_info *compiler_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - nir_deref_instr *closest = rq_deref(b, rq, closest); nir_deref_instr *candidate = rq_deref(b, rq, candidate); @@ -307,7 +305,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query bvh_offset = nir_if_phi(b, bvh_offset, zero); nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset)); - bvh_base = build_addr_to_node(device, b, bvh_base, instr->src[2].ssa); + bvh_base = build_addr_to_node(compiler_info, b, bvh_base, instr->src[2].ssa); rq_store(b, rq, root_bvh_base, bvh_base); rq_store(b, rq, trav_bvh_base, bvh_base); @@ -317,8 +315,8 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query if (vars->use_bvh_stack_rtn) { uint32_t workgroup_size = b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2]; - nir_def *addr = - radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, workgroup_size, vars->shared_base, vars->stack_entries); + nir_def *addr = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, workgroup_size, vars->shared_base, + vars->stack_entries); rq_store(b, rq, trav_stack, addr); rq_store(b, rq, trav_stack_low_watermark, addr); } else { @@ -340,13 +338,14 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query rq_store(b, rq, trav_top_stack, nir_imm_int(b, -1)); - rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !pdev->cache_key.no_rt)); + rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !compiler_info->cache_key->no_rt)); vars->initialize = instr; } static nir_def * -lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *instr, nir_deref_instr *rq) +lower_rq_load(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_intrinsic_instr *instr, + nir_deref_instr *rq) { bool committed = nir_intrinsic_committed(instr); @@ -369,24 +368,24 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i case nir_ray_query_value_intersection_geometry_index: return nir_iand_imm(b, isec_load(b, intersection, geometry_id_and_flags), 0xFFFFFF); case nir_ray_query_value_intersection_instance_custom_index: - return radv_load_custom_instance(device, b, isec_load(b, intersection, instance_addr)); + return radv_load_custom_instance(compiler_info, b, isec_load(b, intersection, instance_addr)); case nir_ray_query_value_intersection_instance_id: - return radv_load_instance_id(device, b, isec_load(b, intersection, instance_addr)); + return radv_load_instance_id(compiler_info, b, isec_load(b, intersection, instance_addr)); case nir_ray_query_value_intersection_instance_sbt_index: return nir_iand_imm(b, isec_load(b, intersection, sbt_offset_and_flags), 0xFFFFFF); case nir_ray_query_value_intersection_object_ray_direction: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix); return nir_build_vec3_mat_mult(b, rq_load(b, rq, direction), wto_matrix, false); } case nir_ray_query_value_intersection_object_ray_origin: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix); return nir_build_vec3_mat_mult(b, rq_load(b, rq, origin), wto_matrix, true); } case nir_ray_query_value_intersection_object_to_world: { nir_def *otw_matrix[3]; - radv_load_otw_matrix(device, b, isec_load(b, intersection, instance_addr), otw_matrix); + radv_load_otw_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), otw_matrix); return nir_vec3(b, nir_channel(b, otw_matrix[0], column), nir_channel(b, otw_matrix[1], column), nir_channel(b, otw_matrix[2], column)); } @@ -403,7 +402,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i } case nir_ray_query_value_intersection_world_to_object: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix); nir_def *vals[3]; for (unsigned i = 0; i < 3; ++i) @@ -419,7 +418,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i return rq_load(b, rq, origin); case nir_ray_query_value_intersection_triangle_vertex_positions: { nir_def *primitive_addr = isec_load(b, intersection, primitive_addr); - return radv_load_vertex_position(device, b, primitive_addr, nir_intrinsic_column(instr)); + return radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(instr)); } default: UNREACHABLE("Invalid nir_ray_query_value!"); @@ -429,7 +428,6 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i } struct traversal_data { - const struct radv_device *device; struct ray_query_vars *vars; nir_deref_instr *rq; }; @@ -510,10 +508,8 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal static nir_def * lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq, - struct radv_device *device) + const struct radv_compiler_info *compiler_info) { - struct radv_physical_device *pdev = radv_device_physical(device); - nir_deref_instr *closest = rq_deref(b, rq, closest); nir_deref_instr *candidate = rq_deref(b, rq, candidate); @@ -550,7 +546,6 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va }; struct traversal_data data = { - .device = device, .vars = vars, .rq = rq, }; @@ -590,10 +585,10 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va nir_push_if(b, rq_load(b, rq, incomplete)); { nir_def *incomplete; - if (pdev->cache_key.bvh8) - incomplete = radv_build_ray_traversal_gfx12(device, b, &args); + if (compiler_info->cache_key->bvh8) + incomplete = radv_build_ray_traversal_gfx12(compiler_info, b, &args); else - incomplete = radv_build_ray_traversal(device, b, &args); + incomplete = radv_build_ray_traversal(compiler_info, b, &args); rq_store(b, rq, incomplete, nir_iand(b, rq_load(b, rq, incomplete), incomplete)); } nir_pop_if(b, NULL); @@ -618,10 +613,8 @@ radv_lower_opaque_ray_query_deref(nir_builder *b, nir_deref_instr *opaque_deref, } bool -radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device) +radv_nir_lower_ray_queries(struct nir_shader *shader, const struct radv_compiler_info *compiler_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - bool progress = false; struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL); @@ -629,7 +622,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device if (!var->data.ray_query) continue; - lower_ray_query(shader, var, query_ht, pdev); + lower_ray_query(shader, var, query_ht, compiler_info); progress = true; } @@ -641,7 +634,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device if (!var->data.ray_query) continue; - lower_ray_query(shader, var, query_ht, pdev); + lower_ray_query(shader, var, query_ht, compiler_info); progress = true; } @@ -676,13 +669,13 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device lower_rq_generate_intersection(&builder, intrinsic, rq); break; case nir_intrinsic_rq_initialize: - lower_rq_initialize(&builder, intrinsic, vars, rq, device); + lower_rq_initialize(&builder, intrinsic, vars, rq, compiler_info); break; case nir_intrinsic_rq_load: - new_dest = lower_rq_load(device, &builder, intrinsic, rq); + new_dest = lower_rq_load(compiler_info, &builder, intrinsic, rq); break; case nir_intrinsic_rq_proceed: - new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, device); + new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, compiler_info); break; case nir_intrinsic_rq_terminate: lower_rq_terminate(&builder, intrinsic, rq); diff --git a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c index b3d25cde454..7335e3c4425 100644 --- a/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c +++ b/src/amd/vulkan/nir/radv_nir_lower_vs_inputs.c @@ -4,7 +4,6 @@ * SPDX-License-Identifier: MIT */ -#include "ac_gpu_info.h" #include "ac_nir.h" #include "nir.h" #include "nir_builder.h" @@ -18,7 +17,7 @@ typedef struct { const struct radv_shader_args *args; const struct radv_shader_info *info; const struct radv_graphics_state_key *gfx_state; - const struct radeon_info *gpu_info; + const struct radv_compiler_info *compiler_info; } lower_vs_inputs_state; static nir_def * @@ -271,12 +270,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs const enum pipe_format attrib_format = adjust_format(s->gfx_state->vi.vertex_attribute_formats[location]); const struct util_format_description *f = util_format_description(attrib_format); const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info( - s->gpu_info->gfx_level, s->gpu_info->compiler_info.has_vtx_format_alpha_adjust_bug, attrib_format); + s->compiler_info->ac->gfx_level, s->compiler_info->ac->has_vtx_format_alpha_adjust_bug, attrib_format); const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding; const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & BITFIELD_MASK(binding_index)); nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers); - nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi)); + nir_def *vertex_buffers = + nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->compiler_info->hw.address32_hi)); nir_def *descriptor = ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 4, ACCESS_CAN_SPECULATE); nir_def *base_index = calc_vs_input_index(b, location, s); @@ -453,8 +453,8 @@ lower_vs_input_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state) } bool -radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage, - const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info) +radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const struct radv_shader_stage *vs_stage, const struct radv_graphics_state_key *gfx_state) { assert(shader->info.stage == MESA_SHADER_VERTEX); @@ -462,7 +462,7 @@ radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_ .info = &vs_stage->info, .args = &vs_stage->args, .gfx_state = gfx_state, - .gpu_info = gpu_info, + .compiler_info = compiler_info, }; return nir_shader_intrinsics_pass(shader, lower_vs_input_instr, nir_metadata_control_flow, &state); diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.c b/src/amd/vulkan/nir/radv_nir_rt_common.c index e1911960e67..3065d92a126 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_common.c @@ -9,18 +9,19 @@ #include "nir_builder.h" #include "radv_debug.h" -static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and); +static nir_def *build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and); bool -radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice) +radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info) { /* gfx12 requires using the bvh4 ds_bvh_stack_rtn differently - enable hw stack instrs on gfx12 only with bvh8 */ - return ((pdevice->info.gfx_level >= GFX11 && pdevice->info.gfx_level < GFX12) || pdevice->cache_key.bvh8) && - !pdevice->cache_key.emulate_rt; + return ((compiler_info->ac->gfx_level >= GFX11 && compiler_info->ac->gfx_level < GFX12) || + compiler_info->cache_key->bvh8) && + !compiler_info->cache_key->emulate_rt; } nir_def * -radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev, uint32_t workgroup_size, +radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info, uint32_t workgroup_size, uint32_t stack_base, uint32_t max_stack_entries) { assert(stack_base % 4 == 0); @@ -41,7 +42,7 @@ radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct r } stack_idx = nir_iadd_imm(b, stack_idx, stack_base / 4); /* There are 4 bytes in each stack entry so no further arithmetic is needed. */ - if (pdev->info.gfx_level >= GFX12) + if (compiler_info->ac->gfx_level >= GFX12) stack_idx = nir_ishl_imm(b, stack_idx, 15); else stack_idx = nir_ishl_imm(b, stack_idx, 18); @@ -72,7 +73,7 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var } static nir_def * -intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax, +intersect_ray_amd_software_box(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir) { const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); @@ -81,7 +82,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_d unsigned old_math_ctrl = b->fp_math_ctrl; b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf; - nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false); + nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false); /* vec4 distances = vec4(INF, INF, INF, INF); */ nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances"); @@ -190,7 +191,7 @@ radv_build_intersect_vertex(nir_builder *b, nir_def *v0_x, nir_def *v1_x, nir_de } static nir_def * -intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax, +intersect_ray_amd_software_tri(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax, nir_def *origin, nir_def *dir, nir_def *inv_dir) { const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4); @@ -198,7 +199,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d unsigned old_math_ctrl = b->fp_math_ctrl; b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf; - nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false); + nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false); const uint32_t coord_offsets[3] = { offsetof(struct radv_bvh_triangle_node, coords[0]), @@ -379,15 +380,13 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d } nir_def * -build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags) +build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const uint64_t bvh_size = 1ull << 42; nir_def *node = nir_ushr_imm(b, addr, 3); node = nir_iand_imm(b, node, (bvh_size - 1) << 3); - if (pdev->cache_key.bvh8) { + if (compiler_info->cache_key->bvh8) { /* The HW ray flags are the same bits as the API flags. * - SpvRayFlagsTerminateOnFirstHitKHRMask, SpvRayFlagsSkipClosestHitShaderKHRMask are handled in shader code. * - SpvRayFlagsSkipTrianglesKHRMask, SpvRayFlagsSkipAABBsKHRMask do not work. @@ -403,14 +402,13 @@ build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, ni } static nir_def * -build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and) +build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and) { - const struct radv_physical_device *pdev = radv_device_physical(device); nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull); addr = nir_ishl_imm(b, addr, 3); /* Assumes everything is in the top half of address space, which is true in * GFX9+ for now. */ - return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; + return compiler_info->ac->gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr; } nir_def * @@ -431,11 +429,9 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool tr } nir_def * -radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index) +radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (pdev->cache_key.bvh8) { + if (compiler_info->cache_key->bvh8) { /* Assume that vertices are uncompressed. */ uint32_t offset = ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float); nir_def *data[4]; @@ -461,12 +457,10 @@ radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *p } void -radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out) +radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out) { - const struct radv_physical_device *pdev = radv_device_physical(device); - unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix); - if (pdev->cache_key.bvh8) + if (compiler_info->cache_key->bvh8) offset = offsetof(struct radv_gfx12_instance_node, wto_matrix); for (unsigned i = 0; i < 3; ++i) { @@ -476,12 +470,10 @@ radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instan } void -radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out) +radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out) { - const struct radv_physical_device *pdev = radv_device_physical(device); - unsigned offset = offsetof(struct radv_bvh_instance_node, otw_matrix); - if (pdev->cache_key.bvh8) + if (compiler_info->cache_key->bvh8) offset = sizeof(struct radv_gfx12_instance_node) + offsetof(struct radv_gfx12_instance_node_user_data, otw_matrix); @@ -492,11 +484,9 @@ radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instan } nir_def * -radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr) +radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (pdev->cache_key.bvh8) { + if (compiler_info->cache_key->bvh8) { return nir_load_global(b, 1, 32, nir_iadd_imm(b, instance_addr, sizeof(struct radv_gfx12_instance_node) + @@ -511,11 +501,9 @@ radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *i } nir_def * -radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr) +radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (pdev->cache_key.bvh8) { + if (compiler_info->cache_key->bvh8) { return nir_load_global(b, 1, 32, nir_iadd_imm(b, instance_addr, sizeof(struct radv_gfx12_instance_node) + @@ -540,14 +528,14 @@ hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags, const struct radv_r } static nir_def * -create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, struct radv_ray_flags *ray_flags) +create_bvh_descriptor(nir_builder *b, const struct radv_compiler_info *compiler_info, struct radv_ray_flags *ray_flags) { /* We create a BVH descriptor that covers the entire memory range. That way we can always * use the same descriptor, which avoids divergence when different rays hit different * instances at the cost of having to use 64-bit node ids. */ const uint64_t bvh_size = 1ull << 42; - const uint32_t sort_triangles_first = pdev->cache_key.bvh8 ? BITFIELD_BIT(52 - 32) : 0; + const uint32_t sort_triangles_first = compiler_info->cache_key->bvh8 ? BITFIELD_BIT(52 - 32) : 0; const uint32_t box_sort_enable = BITFIELD_BIT(63 - 32); const uint32_t triangle_return_mode = BITFIELD_BIT(120 - 96); /* Return IJ for triangles */ @@ -556,7 +544,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s uint32_t dword2 = (bvh_size - 1) & 0xFFFFFFFFu; uint32_t dword3 = ((bvh_size - 1) >> 32) | triangle_return_mode | (1u << 31); - if (pdev->info.gfx_level >= GFX11) { + if (compiler_info->ac->gfx_level >= GFX11) { /* Enable pointer flags on GFX11+ */ dword3 |= BITFIELD_BIT(119 - 96); @@ -570,7 +558,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s nir_imm_int(b, (box_sort_largest << 21) | sort_triangles_first | box_sort_enable), dword1); } - if (pdev->cache_key.bvh8) { + if (compiler_info->cache_key->bvh8) { /* compressed_format_en */ dword3 |= BITFIELD_BIT(115 - 96); /* wide_sort_en */ @@ -584,10 +572,9 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s } static void -insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args, +insert_traversal_triangle_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node) { - struct radv_physical_device *pdev = radv_device_physical(device); if (!args->triangle_cb) return; @@ -602,7 +589,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const { intersection.frontface = nir_fgt_imm(b, div, 0); nir_def *not_cull; - if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt) { + if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt) { nir_def *switch_ccw = nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING); intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw); @@ -623,7 +610,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const nir_flt(b, args->tmin, intersection.t), not_cull)); { - intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false); + intersection.base.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false); nir_def *triangle_info = nir_load_global( b, 2, 32, nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id))); @@ -648,7 +635,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const } static void -insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b, +insert_traversal_triangle_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags, nir_variable *intrinsic_result, nir_def *result, nir_def *global_bvh_node, nir_def *bvh_node) @@ -694,7 +681,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b, nir_def *dword3 = nir_bcsel(b, triangle0, nir_channel(b, result, 3), nir_channel(b, result, 7)); intersection.frontface = nir_inot(b, nir_test_mask(b, dword3, 1)); - intersection.base.node_addr = build_node_to_addr(device, b, global_bvh_node, false); + intersection.base.node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false); intersection.base.primitive_id = nir_ishr_imm(b, dword3, 1); intersection.base.geometry_id_and_flags = nir_ishr_imm(b, nir_bcsel(b, triangle0, nir_channel(b, result, 8), nir_channel(b, result, 9)), 2); @@ -711,7 +698,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b, } static void -insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args, +insert_traversal_aabb_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags, nir_def *bvh_node) { if (!args->aabb_cb) @@ -720,7 +707,7 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str nir_push_if(b, ray_flags->no_skip_aabbs); { struct radv_leaf_intersection intersection; - intersection.node_addr = build_node_to_addr(device, b, bvh_node, false); + intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false); nir_def *triangle_info = nir_load_global( b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id))); intersection.primitive_id = nir_channel(b, triangle_info, 0); @@ -738,14 +725,14 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str } static void -insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args, +insert_traversal_aabb_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node) { if (!args->aabb_cb) return; struct radv_leaf_intersection intersection; - intersection.node_addr = build_node_to_addr(device, b, bvh_node, false); + intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false); intersection.primitive_id = nir_ishr_imm(b, nir_channel(b, result, 3), 1); intersection.geometry_id_and_flags = nir_ishr_imm(b, nir_channel(b, result, 8), 2); intersection.opaque = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 2), 1u << 31)); @@ -758,10 +745,9 @@ insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, con } static nir_def * -fetch_parent_node(struct radv_device *device, nir_builder *b, nir_def *bvh, nir_def *node) +fetch_parent_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh, nir_def *node) { - const struct radv_physical_device *pdev = radv_device_physical(device); - nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, pdev->cache_key.bvh8 ? 16 : 8), 4), 4); + nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, compiler_info->cache_key->bvh8 ? 16 : 8), 4), 4); return nir_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4); } @@ -780,10 +766,10 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3 } static nir_def * -build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def *base_addr, nir_def *ptr_flags, +build_bvh_base(nir_builder *b, const struct radv_compiler_info *compiler_info, nir_def *base_addr, nir_def *ptr_flags, bool overwrite) { - if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt) + if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt) return base_addr; nir_def *base_addr_vec = nir_unpack_64_2x32(b, base_addr); @@ -796,7 +782,7 @@ build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def } static void -build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, const struct radv_ray_traversal_args *args, +build_instance_exit(nir_builder *b, const struct radv_compiler_info *compiler_info, const struct radv_ray_traversal_args *args, nir_def *stack_instance_exit, nir_def *ptr_flags) { nir_def *root_instance_exit = nir_iand( @@ -805,7 +791,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit)); instance_exit->control = nir_selection_control_dont_flatten; { - if (pdev->cache_key.bvh8 && args->use_bvh_stack_rtn) + if (compiler_info->cache_key->bvh8 && args->use_bvh_stack_rtn) nir_store_deref(b, args->vars.stack, nir_ior_imm(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), 0x1); else @@ -814,7 +800,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1); nir_def *root_bvh_base = - pdev->cache_key.bvh8 ? args->root_bvh_base : build_bvh_base(b, pdev, args->root_bvh_base, ptr_flags, true); + compiler_info->cache_key->bvh8 ? args->root_bvh_base : build_bvh_base(b, compiler_info, args->root_bvh_base, ptr_flags, true); nir_store_deref(b, args->vars.bvh_base, root_bvh_base, 0x1); nir_store_deref(b, args->vars.origin, args->origin, 7); @@ -825,9 +811,8 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con } nir_def * -radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) +radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args) { - const struct radv_physical_device *pdev = radv_device_physical(device); nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec4_type(), "intrinsic_result"); @@ -849,9 +834,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_iand_imm(b, args->flags, ~(SpvRayFlagsTerminateOnFirstHitKHRMask | SpvRayFlagsSkipClosestHitShaderKHRMask)); nir_store_deref(b, args->vars.bvh_base, - build_bvh_base(b, pdev, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1); + build_bvh_base(b, compiler_info, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1); - nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); + nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags); nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0); nir_loop *loop = nir_push_loop(b); @@ -868,7 +853,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_jump(b, nir_jump_break); } nir_pop_if(b, NULL); - build_instance_exit(b, pdev, args, + build_instance_exit(b, compiler_info, args, nir_ilt(b, nir_load_deref(b, args->vars.stack), nir_load_deref(b, args->vars.top_stack)), ptr_flags); } @@ -885,7 +870,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc } nir_pop_if(b, NULL); build_instance_exit( - b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)), + b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)), ptr_flags); } @@ -901,9 +886,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc if (args->use_bvh_stack_rtn) nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), 1), 0x1); nir_def *prev = nir_load_deref(b, args->vars.previous_node); - nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); + nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true); - nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev); + nir_def *parent = fetch_parent_node(compiler_info, b, bvh_addr, prev); nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE)); { nir_store_var(b, incomplete, nir_imm_false(b), 0x1); @@ -945,7 +930,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node)); bool has_result = false; - if (pdev->info.compiler_info.has_image_bvh_intersect_ray && !pdev->cache_key.emulate_rt) { + if (compiler_info->ac->has_image_bvh_intersect_ray && !compiler_info->cache_key->emulate_rt) { nir_store_var( b, intrinsic_result, nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node), @@ -961,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc { nir_push_if(b, nir_test_mask(b, bvh_node, BITFIELD64_BIT(ffs(radv_bvh_node_aabb) - 1))); { - insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node); + insert_traversal_aabb_case(compiler_info, b, args, &ray_flags, global_bvh_node); } nir_push_else(b, NULL); { @@ -972,14 +957,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc } /* instance */ - nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false); + nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false); nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); nir_def *instance_data = nir_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0); nir_def *wto_matrix[3]; - radv_load_wto_matrix(device, b, instance_node_addr, wto_matrix); + radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix); nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1); @@ -1006,7 +991,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc nir_def *instance_pointer = nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2)); instance_pointer = nir_iand(b, instance_pointer, instance_flag_mask); - nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, pdev, instance_pointer, ptr_flags, false), + nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, compiler_info, instance_pointer, ptr_flags, false), 0x1); /* Push the instance root node onto the stack */ @@ -1040,7 +1025,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc /* If we didn't run the intrinsic cause the hardware didn't support it, * emulate ray/box intersection here */ result = intersect_ray_amd_software_box( - device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), + compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } @@ -1097,10 +1082,10 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc /* If we didn't run the intrinsic cause the hardware didn't support it, * emulate ray/tri intersection here */ result = intersect_ray_amd_software_tri( - device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), + compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin), nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir)); } - insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node); + insert_traversal_triangle_case(compiler_info, b, args, &ray_flags, result, global_bvh_node); } nir_pop_if(b, NULL); @@ -1131,10 +1116,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc } nir_def * -radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args) +radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args) { - const struct radv_physical_device *pdev = radv_device_physical(device); - nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete"); nir_store_var(b, incomplete, nir_imm_true(b), 0x1); nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec_type(8), "intrinsic_result"); @@ -1152,7 +1135,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const .no_skip_aabbs = radv_test_flag(b, args, SpvRayFlagsSkipAABBsKHRMask, false), }; - nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags); + nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags); nir_loop *loop = nir_push_loop(b); { @@ -1168,7 +1151,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_jump(b, nir_jump_break); } nir_pop_if(b, NULL); - build_instance_exit(b, pdev, args, + build_instance_exit(b, compiler_info, args, nir_test_mask(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), NULL); } @@ -1184,7 +1167,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const } nir_pop_if(b, NULL); build_instance_exit( - b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)), + b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)), NULL); } @@ -1210,7 +1193,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_imm_int(b, (int32_t)offsetof(struct radv_gfx12_box_node, parent_id) - (radv_bvh_node_box32 << 3))); nir_def *offset = nir_iadd(b, nir_ishl_imm(b, prev, 3), field_offset); - nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true); + nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true); loaded_parent_id = nir_load_global(b, 1, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset))); } nir_push_else(b, NULL); @@ -1294,7 +1277,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_push_else(b, NULL); { /* instance */ - nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false); + nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false); nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1); nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, result, 6), 1); @@ -1377,13 +1360,13 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const nir_push_if(b, nir_test_mask(b, nir_channel(b, result, 1), 1u << 31)); { nir_push_if(b, ray_flags.no_skip_aabbs); - insert_traversal_aabb_case_gfx12(device, b, args, &ray_flags, result, global_bvh_node); + insert_traversal_aabb_case_gfx12(compiler_info, b, args, &ray_flags, result, global_bvh_node); nir_pop_if(b, NULL); } nir_push_else(b, NULL); { nir_push_if(b, ray_flags.no_skip_triangles); - insert_traversal_triangle_case_gfx12(device, b, args, &ray_flags, intrinsic_result, result, global_bvh_node, + insert_traversal_triangle_case_gfx12(compiler_info, b, args, &ray_flags, intrinsic_result, result, global_bvh_node, bvh_node); nir_pop_if(b, NULL); } diff --git a/src/amd/vulkan/nir/radv_nir_rt_common.h b/src/amd/vulkan/nir/radv_nir_rt_common.h index e48bdc07995..0442b2f63b7 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_common.h +++ b/src/amd/vulkan/nir/radv_nir_rt_common.h @@ -12,27 +12,26 @@ #include "compiler/spirv/spirv.h" -struct radv_device; -struct radv_physical_device; +struct radv_compiler_info; -bool radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice); +bool radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info); -nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev, +nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info, uint32_t workgroup_size, uint32_t stack_base, uint32_t max_stack_entries); -nir_def *build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags); +nir_def *build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags); nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation); -nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index); +nir_def *radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index); -void radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out); +void radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out); -void radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out); +void radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out); -nir_def *radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr); +nir_def *radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr); -nir_def *radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr); +nir_def *radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr); struct radv_ray_traversal_args; @@ -162,10 +161,10 @@ struct radv_ray_traversal_args { * rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified * callbacks breaks out of the traversal loop. */ -nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b, +nir_def *radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args); -nir_def *radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, +nir_def *radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args); #endif /* RADV_NIR_RT_COMMON_H */ diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_common.c b/src/amd/vulkan/nir/radv_nir_rt_stage_common.c index 8ab0c07885a..b3c22bc3bce 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_common.c +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_common.c @@ -257,11 +257,9 @@ radv_nir_return_param_from_type(nir_parameter *param, const glsl_type *type, boo } void -radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap, - struct radv_shader_debug_info *debug) +radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage, + bool uses_descriptor_heap, struct radv_shader_debug_info *debug) { - const struct radv_physical_device *pdev = radv_device_physical(device); - nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "rt_prolog"); stage->stage = MESA_SHADER_COMPUTE; stage->nir = b.shader; @@ -270,24 +268,25 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage stage->info.loads_dynamic_offsets = true; stage->info.force_indirect_descriptors = true; stage->info.descriptor_heap = uses_descriptor_heap; - stage->info.wave_size = pdev->rt_wave_size; + stage->info.wave_size = compiler_info->rt_wave_size; stage->info.workgroup_size = stage->info.wave_size; stage->info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0; stage->info.type = RADV_SHADER_TYPE_RT_PROLOG; - stage->info.cs.block_size[0] = pdev->rt_wave_size; + stage->info.cs.block_size[0] = compiler_info->rt_wave_size; stage->info.cs.block_size[1] = 1; stage->info.cs.block_size[2] = 1; stage->info.cs.uses_thread_id[0] = true; for (unsigned i = 0; i < 3; i++) stage->info.cs.uses_block_id[i] = true; - radv_declare_shader_args(device, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args, debug); + radv_declare_shader_args(compiler_info, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args, + debug); stage->info.user_sgprs_locs = stage->args.user_sgprs_locs; - b.shader->info.workgroup_size[0] = pdev->rt_wave_size; - b.shader->info.api_subgroup_size = pdev->rt_wave_size; - b.shader->info.max_subgroup_size = pdev->rt_wave_size; - b.shader->info.min_subgroup_size = pdev->rt_wave_size; + b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size; + b.shader->info.api_subgroup_size = compiler_info->rt_wave_size; + b.shader->info.max_subgroup_size = compiler_info->rt_wave_size; + b.shader->info.min_subgroup_size = compiler_info->rt_wave_size; nir_function *raygen_function = nir_function_create(b.shader, "raygen_func"); radv_nir_init_rt_function_params(raygen_function, MESA_SHADER_RAYGEN, 0, 0, uses_descriptor_heap); @@ -306,7 +305,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage nir_def *launch_size_addr = nir_pack_64_2x32(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.launch_size_addr)); nir_def *traversal_addr = nir_pack_64_2x32_split(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.traversal_shader_addr), - nir_imm_int(&b, pdev->info.address32_hi)); + nir_imm_int(&b, compiler_info->hw.address32_hi)); nir_def *raygen_sbt = nir_pack_64_2x32(&b, ac_nir_load_smem(&b, 2, sbt_desc, nir_imm_int(&b, 0), 4, 0)); nir_def *launch_sizes = ac_nir_load_smem(&b, 3, launch_size_addr, nir_imm_int(&b, 0), 4, 0); @@ -320,7 +319,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage nir_def *local_id = nir_channel(&b, nir_load_local_invocation_id(&b), 0); - nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size), local_id); + nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size), local_id); nir_def *unswizzled_id_y = wg_ids[1]; /* Swizzle ray launch IDs. We dispatch a 1D 32x1/64x1 workgroup natively. Many games dispatch @@ -366,7 +365,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage swizzled_id_y = nir_bitfield_select(&b, nir_imm_int(&b, 0x3), swizzled_id_y, swizzled_id_shifted_y); uint32_t workgroup_width = 8; - uint32_t workgroup_height = pdev->rt_wave_size == 32 ? 4 : 8; + uint32_t workgroup_height = compiler_info->rt_wave_size == 32 ? 4 : 8; uint32_t workgroup_height_mask = workgroup_height - 1; /* Fix up the workgroup IDs after converting from 32x1/64x1 to 8x4/8x8. The X dimension of the @@ -376,7 +375,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage * the fact we divided the X component of the ID. */ nir_def *wg_id_y_rem = nir_iand_imm(&b, wg_ids[1], workgroup_height_mask); - nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size); + nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size); new_wg_start_x = nir_iadd(&b, new_wg_start_x, nir_imul_imm(&b, wg_id_y_rem, workgroup_width)); nir_def *new_wg_start_y = nir_iand_imm(&b, wg_ids[1], ~workgroup_height_mask); @@ -393,7 +392,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage /* If parts of this wave would've exceeded the launch size in the X dimension, their threads will be masked out and * exec won't equal -1. In that case, using swizzled IDs is invalid. */ - nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, pdev->rt_wave_size, nir_imm_true(&b)), -1); + nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, compiler_info->rt_wave_size, nir_imm_true(&b)), -1); nir_def *partial_oob_y = nir_uge(&b, wg_ids[1], y_wg_bound); nir_def *partial_oob = nir_ior(&b, partial_oob_x, partial_oob_y); diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_common.h b/src/amd/vulkan/nir/radv_nir_rt_stage_common.h index da9099fecf0..fb69582f4a9 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_common.h +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_common.h @@ -41,7 +41,7 @@ typedef struct glsl_type glsl_type; */ struct radv_rt_case_data { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; struct radv_ray_tracing_pipeline *pipeline; void *param_data; }; @@ -154,12 +154,12 @@ struct radv_nir_rt_traversal_result { nir_variable *barycentrics; }; -struct radv_nir_rt_traversal_result radv_build_traversal(struct radv_device *device, +struct radv_nir_rt_traversal_result radv_build_traversal(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline, nir_builder *b, struct radv_nir_rt_traversal_params *params, struct radv_ray_tracing_stage_info *info); -void radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap, - struct radv_shader_debug_info *debug); +void radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage, + bool uses_descriptor_heap, struct radv_shader_debug_info *debug); #endif // MESA_RADV_NIR_RT_STAGE_COMMON_H diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_cps.c b/src/amd/vulkan/nir/radv_nir_rt_stage_cps.c index 27873502a30..e73b0110120 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_cps.c +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_cps.c @@ -15,9 +15,7 @@ #include "ac_nir.h" #include "aco_nir_call_attribs.h" -#include "radv_device.h" #include "radv_nir_rt_stage_functions.h" -#include "radv_physical_device.h" #include "radv_pipeline_rt.h" #include "radv_shader.h" @@ -69,7 +67,7 @@ radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *ni * Global variables for an RT pipeline */ struct rt_variables { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; const VkPipelineCreateFlags2 flags; nir_variable *shader_addr; @@ -107,10 +105,11 @@ struct rt_variables { }; static struct rt_variables -create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags) +create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const VkPipelineCreateFlags2 flags) { struct rt_variables vars = { - .device = device, + .compiler_info = compiler_info, .flags = flags, }; vars.shader_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_addr"); @@ -184,6 +183,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) struct radv_lower_rt_instruction_data *data = _data; struct rt_variables *vars = data->vars; + const struct radv_compiler_info *compiler_info = vars->compiler_info; b->cursor = nir_before_instr(&intr->instr); @@ -297,7 +297,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) break; } case nir_intrinsic_load_ray_instance_custom_index: { - ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr)); + ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr)); break; } case nir_intrinsic_load_primitive_id: { @@ -310,7 +310,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) break; } case nir_intrinsic_load_instance_id: { - ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr)); + ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr)); break; } case nir_intrinsic_load_ray_flags: { @@ -325,7 +325,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) unsigned c = nir_intrinsic_column(intr); nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr); nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix); + radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix); nir_def *vals[3]; for (unsigned i = 0; i < 3; ++i) @@ -337,20 +337,20 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) case nir_intrinsic_load_ray_object_to_world: { unsigned c = nir_intrinsic_column(intr); nir_def *otw_matrix[3]; - radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix); + radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix); ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c), nir_channel(b, otw_matrix[2], c)); break; } case nir_intrinsic_load_ray_object_origin: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true); break; } case nir_intrinsic_load_ray_object_direction: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false); break; } @@ -430,7 +430,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data) } case nir_intrinsic_load_ray_triangle_vertex_positions: { nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr); - ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr)); + ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr)); break; } default: @@ -554,7 +554,7 @@ init_cps_function(nir_function *function, bool has_position_fetch, bool uses_des void radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader, - struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, + const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline, bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info) { const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; @@ -574,7 +574,7 @@ radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *inf impl->function->params[idx].driver_attributes |= ACO_NIR_PARAM_ATTRIB_DISCARDABLE; } - struct rt_variables vars = create_rt_variables(shader, device, pipeline->base.base.create_flags); + struct rt_variables vars = create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags); struct radv_rt_shader_info rt_info = {0}; diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_cps.h b/src/amd/vulkan/nir/radv_nir_rt_stage_cps.h index 5c776137b0d..b6b799a7bef 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_cps.h +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_cps.h @@ -14,8 +14,9 @@ void radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *nir); void radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader, - struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, - bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info); + const struct radv_compiler_info *compiler_info, + struct radv_ray_tracing_pipeline *pipeline, bool has_position_fetch, + const struct radv_ray_tracing_stage_info *traversal_info); void radv_nir_lower_rt_io_cps(nir_shader *shader); #endif // RADV_NIR_RT_STAGE_CPS_H diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_functions.c b/src/amd/vulkan/nir/radv_nir_rt_stage_functions.c index eb319588b7d..14cb05d5f30 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_functions.c +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_functions.c @@ -13,8 +13,6 @@ #include "nir/radv_nir_rt_stage_common.h" #include "nir/radv_nir_rt_stage_functions.h" -#include "radv_device.h" -#include "radv_physical_device.h" #include "radv_shader.h" #include "aco_nir_call_attribs.h" @@ -172,7 +170,7 @@ radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage stage * Global variables for an RT pipeline */ struct rt_variables { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; const VkPipelineCreateFlags2 flags; /* Stage-dependent parameter indices */ @@ -214,12 +212,12 @@ struct rt_variables { }; static struct rt_variables -create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags, - unsigned max_payload_size, unsigned max_hit_attrib_size) +create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info, + const VkPipelineCreateFlags2 flags, unsigned max_payload_size, unsigned max_hit_attrib_size) { const bool uses_descriptor_heap = flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; struct rt_variables vars = { - .device = device, + .compiler_info = compiler_info, .flags = flags, }; @@ -349,6 +347,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) struct rt_variables *vars = _vars; const bool uses_descriptor_heap = vars->flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; + const struct radv_compiler_info *compiler_info = vars->compiler_info; b->cursor = nir_before_instr(&intr->instr); @@ -452,7 +451,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) break; } case nir_intrinsic_load_ray_instance_custom_index: { - ret = radv_load_custom_instance(vars->device, b, nir_load_param(b, vars->instance_addr_param)); + ret = radv_load_custom_instance(compiler_info, b, nir_load_param(b, vars->instance_addr_param)); break; } case nir_intrinsic_load_primitive_id: { @@ -465,7 +464,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) break; } case nir_intrinsic_load_instance_id: { - ret = radv_load_instance_id(vars->device, b, nir_load_param(b, vars->instance_addr_param)); + ret = radv_load_instance_id(compiler_info, b, nir_load_param(b, vars->instance_addr_param)); break; } case nir_intrinsic_load_ray_flags: { @@ -483,7 +482,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) unsigned c = nir_intrinsic_column(intr); nir_def *instance_node_addr = nir_load_param(b, vars->instance_addr_param); nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix); + radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix); nir_def *vals[3]; for (unsigned i = 0; i < 3; ++i) @@ -495,20 +494,20 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) case nir_intrinsic_load_ray_object_to_world: { unsigned c = nir_intrinsic_column(intr); nir_def *otw_matrix[3]; - radv_load_otw_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), otw_matrix); + radv_load_otw_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), otw_matrix); ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c), nir_channel(b, otw_matrix[2], c)); break; } case nir_intrinsic_load_ray_object_origin: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_origin_param), wto_matrix, true); break; } case nir_intrinsic_load_ray_object_direction: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_direction_param), wto_matrix, false); break; } @@ -713,7 +712,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars) } case nir_intrinsic_load_ray_triangle_vertex_positions: { nir_def *primitive_addr = nir_load_param(b, vars->primitive_addr_param); - ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr)); + ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr)); break; } default: @@ -852,7 +851,7 @@ radv_get_rt_shader_entrypoint(nir_shader *shader) void radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size, - uint32_t hit_attrib_size, struct radv_device *device, + uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline) { const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; @@ -866,7 +865,7 @@ radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_inf uses_descriptor_heap); struct rt_variables vars = - create_rt_variables(shader, device, pipeline->base.base.create_flags, payload_size, hit_attrib_size); + create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags, payload_size, hit_attrib_size); nir_builder b = nir_builder_at(nir_before_impl(impl)); unsigned num_hit_attribs = DIV_ROUND_UP(hit_attrib_size, 4); diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_functions.h b/src/amd/vulkan/nir/radv_nir_rt_stage_functions.h index b1f7d881832..dead5599192 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_functions.h +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_functions.h @@ -17,7 +17,7 @@ void radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage unsigned hit_attrib_size, bool uses_descriptor_heap); void radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size, - uint32_t hit_attrib_size, struct radv_device *device, + uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline); void radv_nir_lower_rt_io_functions(nir_shader *shader); diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.c b/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.c index 2eefb844b4f..6a3dd404dda 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.c +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.c @@ -11,12 +11,10 @@ #include "aco_nir_call_attribs.h" #include "nir_builder.h" -#include "radv_device.h" #include "radv_nir_rt_stage_functions.h" -#include "radv_physical_device.h" struct chit_miss_inlining_params { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; struct radv_nir_rt_traversal_params *trav_params; struct radv_nir_rt_traversal_result *trav_result; @@ -27,7 +25,7 @@ struct chit_miss_inlining_params { }; struct chit_miss_inlining_vars { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; nir_variable *shader_record_ptr; nir_variable *origin; @@ -104,6 +102,7 @@ static bool lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *_vars) { struct chit_miss_inlining_vars *vars = _vars; + const struct radv_compiler_info *compiler_info = vars->compiler_info; b->cursor = nir_after_instr(&intr->instr); @@ -125,13 +124,13 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void * ret = nir_load_var(b, vars->tmin); break; case nir_intrinsic_load_ray_instance_custom_index: - ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr)); + ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr)); break; case nir_intrinsic_load_primitive_id: ret = nir_load_var(b, vars->primitive_id); break; case nir_intrinsic_load_instance_id: - ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr)); + ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr)); break; case nir_intrinsic_load_ray_hit_kind: ret = nir_load_var(b, vars->hit_kind); @@ -151,7 +150,7 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void * unsigned c = nir_intrinsic_column(intr); nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr); nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix); + radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix); nir_def *vals[3]; for (unsigned i = 0; i < 3; ++i) @@ -163,26 +162,26 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void * case nir_intrinsic_load_ray_object_to_world: { unsigned c = nir_intrinsic_column(intr); nir_def *otw_matrix[3]; - radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix); + radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix); ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c), nir_channel(b, otw_matrix[2], c)); break; } case nir_intrinsic_load_ray_object_origin: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true); break; } case nir_intrinsic_load_ray_object_direction: { nir_def *wto_matrix[3]; - radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix); + radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix); ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false); break; } case nir_intrinsic_load_ray_triangle_vertex_positions: { nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr); - ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr)); + ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr)); break; } default: @@ -238,7 +237,6 @@ radv_nir_lower_rt_io_monolithic(nir_shader *nir) } struct rt_variables { - struct radv_device *device; const VkPipelineCreateFlags2 flags; uint32_t payload_offset; @@ -252,13 +250,13 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_ struct radv_rt_case_data *data) { nir_shader *shader = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->recursive_shader].nir); + radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->recursive_shader].nir); assert(shader); struct chit_miss_inlining_params *params = data->param_data; struct chit_miss_inlining_vars vars = { - .device = params->device, + .compiler_info = params->compiler_info, }; nir_opt_dead_cf(shader); @@ -283,7 +281,7 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_ } struct lower_rt_instruction_monolithic_state { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; struct radv_ray_tracing_pipeline *pipeline; const VkRayTracingPipelineCreateInfoKHR *pCreateInfo; @@ -296,8 +294,8 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data) b->cursor = nir_after_instr(&intr->instr); struct lower_rt_instruction_monolithic_state *state = data; - const struct radv_physical_device *pdev = radv_device_physical(state->device); struct rt_variables *vars = state->vars; + const struct radv_compiler_info *compiler_info = state->compiler_info; switch (intr->intrinsic) { case nir_intrinsic_execute_callable: @@ -333,19 +331,19 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, vars->stack_size), 0x1); struct radv_nir_rt_traversal_result result = - radv_build_traversal(state->device, state->pipeline, b, ¶ms, NULL); + radv_build_traversal(compiler_info, state->pipeline, b, ¶ms, NULL); nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1); struct chit_miss_inlining_params inline_params = { - .device = state->device, + .compiler_info = compiler_info, .trav_params = ¶ms, .trav_result = &result, .payload_offset = vars->payload_offset, }; struct radv_rt_case_data case_data = { - .device = state->device, + .compiler_info = compiler_info, .pipeline = state->pipeline, .param_data = &inline_params, }; @@ -383,7 +381,7 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data) nir_pop_if(b, NULL); b->shader->info.shared_size = - MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); + MAX2(b->shader->info.shared_size, compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t)); nir_instr_remove(&intr->instr); return true; @@ -455,7 +453,7 @@ radv_count_ray_payload_size(nir_builder *b, nir_intrinsic_instr *instr, void *da } void -radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device, +radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline) { const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; @@ -465,7 +463,6 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device, nir_builder b = nir_builder_at(nir_before_impl(impl)); struct rt_variables vars = { - .device = device, .flags = pipeline->base.base.create_flags, .stack_size = b.shader->scratch_size, }; @@ -474,7 +471,7 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device, nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1); struct lower_rt_instruction_monolithic_state state = { - .device = device, + .compiler_info = compiler_info, .pipeline = pipeline, .vars = &vars, }; diff --git a/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.h b/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.h index 24d8ed2c97f..8d20a9b61c0 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.h +++ b/src/amd/vulkan/nir/radv_nir_rt_stage_monolithic.h @@ -11,7 +11,7 @@ #include "radv_pipeline_rt.h" -void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device, +void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline); void radv_nir_lower_rt_io_monolithic(nir_shader *shader); diff --git a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c index 1debc5c1649..9a191448093 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c +++ b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.c @@ -13,7 +13,6 @@ #include "aco_nir_call_attribs.h" #include "nir_builder.h" -#include "radv_device.h" #include "radv_meta_nir.h" #include "radv_nir_rt_stage_functions.h" #include "radv_physical_device.h" @@ -64,7 +63,7 @@ struct anyhit_shader_vars { /* Parameters passed through to an inlined any-hit/intersection shader */ struct traversal_inlining_params { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; radv_nir_ahit_isec_preprocess_cb preprocess; void *preprocess_data; @@ -76,7 +75,7 @@ struct traversal_inlining_params { /* Data about ray traversal passed through to AABB/Intersection callbacks */ struct traversal_data { - struct radv_device *device; + const struct radv_compiler_info *compiler_info; struct radv_nir_rt_traversal_params *params; struct traversal_vars trav_vars; @@ -223,6 +222,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par b->cursor = nir_after_instr(&intr->instr); struct traversal_inlining_params *params = _params; + const struct radv_compiler_info *compiler_info = params->compiler_info; nir_def *ret = NULL; switch (intr->intrinsic) { @@ -252,13 +252,13 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par ret = nir_load_var(b, params->anyhit_vars->tmin); break; case nir_intrinsic_load_ray_instance_custom_index: - ret = radv_load_custom_instance(params->device, b, nir_load_var(b, params->candidate->instance_addr)); + ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, params->candidate->instance_addr)); break; case nir_intrinsic_load_primitive_id: ret = nir_load_var(b, params->candidate->primitive_id); break; case nir_intrinsic_load_instance_id: - ret = radv_load_instance_id(params->device, b, nir_load_var(b, params->candidate->instance_addr)); + ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, params->candidate->instance_addr)); break; case nir_intrinsic_load_ray_hit_kind: ret = nir_load_var(b, params->candidate->hit_kind); @@ -278,7 +278,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par unsigned c = nir_intrinsic_column(intr); nir_def *instance_node_addr = nir_load_var(b, params->candidate->instance_addr); nir_def *wto_matrix[3]; - radv_load_wto_matrix(params->device, b, instance_node_addr, wto_matrix); + radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix); nir_def *vals[3]; for (unsigned i = 0; i < 3; ++i) @@ -290,7 +290,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par case nir_intrinsic_load_ray_object_to_world: { unsigned c = nir_intrinsic_column(intr); nir_def *otw_matrix[3]; - radv_load_otw_matrix(params->device, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix); + radv_load_otw_matrix(compiler_info, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix); ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c), nir_channel(b, otw_matrix[2], c)); break; @@ -346,7 +346,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par } case nir_intrinsic_load_ray_triangle_vertex_positions: { nir_def *primitive_addr = nir_load_var(b, params->candidate->primitive_addr); - ret = radv_load_vertex_position(params->device, b, primitive_addr, nir_intrinsic_column(intr)); + ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr)); break; } default: @@ -371,7 +371,7 @@ insert_inlined_shader(nir_builder *b, struct traversal_inlining_params *params, * duplicates to the original variables passed through in 'params'. */ struct traversal_inlining_params src_params = { - .device = params->device, + .compiler_info = params->compiler_info, }; struct traversal_vars src_trav_vars; @@ -647,7 +647,7 @@ radv_build_ahit_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g struct traversal_inlining_params *params = data->param_data; nir_shader *nir_stage = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir); + radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir); assert(nir_stage); params->preprocess(nir_stage, params->preprocess_data); @@ -673,7 +673,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g struct traversal_inlining_params *params = data->param_data; nir_shader *nir_stage = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->intersection_shader].nir); + radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->intersection_shader].nir); assert(nir_stage); params->preprocess(nir_stage, params->preprocess_data); @@ -681,7 +681,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g nir_shader *any_hit_stage = NULL; if (group->any_hit_shader != VK_SHADER_UNUSED_KHR) { any_hit_stage = - radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir); + radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir); assert(any_hit_stage); params->preprocess(any_hit_stage, params->preprocess_data); @@ -698,7 +698,7 @@ static nir_def * radv_build_token_begin(nir_builder *b, struct traversal_data *data, nir_def *hit, enum radv_packed_token_type token_type, nir_def *token_size, uint32_t max_token_size) { - struct radv_rra_trace_data *rra_trace = &data->device->rra_trace; + struct radv_rra_trace_data *rra_trace = data->compiler_info->rra_trace; assert(rra_trace->ray_history_addr); assert(rra_trace->ray_history_buffer_size >= max_token_size); @@ -767,7 +767,7 @@ radv_build_end_trace_token(nir_builder *b, struct traversal_data *data, nir_def dst_addr = nir_iadd_imm(b, dst_addr, 8); nir_def *dispatch_indices = - ac_nir_load_smem(b, 2, nir_imm_int64(b, data->device->rra_trace.ray_history_addr), + ac_nir_load_smem(b, 2, nir_imm_int64(b, data->compiler_info->rra_trace->ray_history_addr), nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0); nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1)); nir_def *dispatch_and_flags = nir_iand_imm(b, data->params->cull_mask_and_flags, 0xFFFF); @@ -883,7 +883,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1); struct traversal_inlining_params inlining_params = { - .device = data->device, + .compiler_info = data->compiler_info, .trav_vars = &data->trav_vars, .candidate = &candidate_result, .anyhit_vars = &ahit_vars, @@ -892,7 +892,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int }; struct radv_rt_case_data case_data = { - .device = data->device, + .compiler_info = data->compiler_info, .pipeline = data->pipeline, .param_data = &inlining_params, }; @@ -1040,7 +1040,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1); struct traversal_inlining_params inlining_params = { - .device = data->device, + .compiler_info = data->compiler_info, .trav_vars = &data->trav_vars, .candidate = &candidate_result, .anyhit_vars = &ahit_vars, @@ -1049,7 +1049,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio }; struct radv_rt_case_data case_data = { - .device = data->device, + .compiler_info = data->compiler_info, .pipeline = data->pipeline, .param_data = &inlining_params, }; @@ -1148,17 +1148,17 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal } struct radv_nir_rt_traversal_result -radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, nir_builder *b, - struct radv_nir_rt_traversal_params *params, struct radv_ray_tracing_stage_info *info) +radv_build_traversal(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline, + nir_builder *b, struct radv_nir_rt_traversal_params *params, + struct radv_ray_tracing_stage_info *info) { const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT; - const struct radv_physical_device *pdev = radv_device_physical(device); nir_variable *barycentrics = nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics"); barycentrics->data.driver_location = 0; struct traversal_data data = { - .device = device, + .compiler_info = compiler_info, .params = params, .pipeline = pipeline, }; @@ -1202,25 +1202,27 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin } nir_push_else(b, NULL); { - nir_store_var(b, data.trav_vars.current_node, - nir_imm_int(b, radv_use_bvh_stack_rtn(pdev) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE), - 0x1); + nir_store_var( + b, data.trav_vars.current_node, + nir_imm_int(b, radv_use_bvh_stack_rtn(compiler_info) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE), + 0x1); } nir_pop_if(b, NULL); bvh_offset = nir_if_phi(b, bvh_offset, zero); nir_def *root_bvh_base = nir_iadd(b, params->accel_struct, nir_u2u64(b, bvh_offset)); - root_bvh_base = build_addr_to_node(device, b, root_bvh_base, params->cull_mask_and_flags); + root_bvh_base = build_addr_to_node(compiler_info, b, root_bvh_base, params->cull_mask_and_flags); nir_def *stack_idx = nir_load_subgroup_invocation(b); uint32_t stack_stride; - if (radv_use_bvh_stack_rtn(pdev)) { - stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, pdev->rt_wave_size, 0, MAX_STACK_ENTRY_COUNT); + if (radv_use_bvh_stack_rtn(compiler_info)) { + stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, compiler_info->rt_wave_size, 0, + MAX_STACK_ENTRY_COUNT); stack_stride = 1; } else { stack_idx = nir_imul_imm(b, stack_idx, sizeof(uint32_t)); - stack_stride = pdev->rt_wave_size * sizeof(uint32_t); + stack_stride = compiler_info->rt_wave_size * sizeof(uint32_t); } nir_store_var(b, data.trav_vars.result.hit, nir_imm_false(b), 1); @@ -1245,7 +1247,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin nir_store_var(b, data.trav_vars.top_stack, nir_imm_int(b, -1), 1); nir_variable *iteration_instance_count = NULL; - if (device->rra_trace.ray_history_addr) { + if (compiler_info->rra_trace->ray_history_addr) { data.trav_vars.ahit_isec_count = nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "ahit_isec_count"); iteration_instance_count = @@ -1277,16 +1279,16 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin .triangle_cb = (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR) ? NULL : handle_candidate_triangle, - .use_bvh_stack_rtn = radv_use_bvh_stack_rtn(pdev), + .use_bvh_stack_rtn = radv_use_bvh_stack_rtn(compiler_info), .data = &data, }; - if (pdev->cache_key.bvh8) - radv_build_ray_traversal_gfx12(device, b, &args); + if (compiler_info->cache_key->bvh8) + radv_build_ray_traversal_gfx12(compiler_info, b, &args); else - radv_build_ray_traversal(device, b, &args); + radv_build_ray_traversal(compiler_info, b, &args); - if (device->rra_trace.ray_history_addr) + if (compiler_info->rra_trace->ray_history_addr) radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count)); nir_progress(true, b->impl, nir_metadata_none); @@ -1304,21 +1306,19 @@ preprocess_traversal_shader_ahit_isec(nir_shader *nir, void *cb) } nir_shader * -radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, +radv_build_traversal_shader(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline, struct radv_ray_tracing_stage_info *info, radv_nir_traversal_preprocess_cb preprocess, uint32_t payload_size, uint32_t hit_attrib_size) { - const struct radv_physical_device *pdev = radv_device_physical(device); - /* Create the traversal shader as an intersection shader to prevent validation failures due to * invalid variable modes.*/ nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_INTERSECTION, "rt_traversal"); - b.shader->options = &pdev->nir_options[MESA_SHADER_INTERSECTION]; - b.shader->info.workgroup_size[0] = pdev->rt_wave_size; - b.shader->info.api_subgroup_size = pdev->rt_wave_size; - b.shader->info.max_subgroup_size = pdev->rt_wave_size; - b.shader->info.min_subgroup_size = pdev->rt_wave_size; - b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); + b.shader->options = &compiler_info->nir_options[MESA_SHADER_INTERSECTION]; + b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size; + b.shader->info.api_subgroup_size = compiler_info->rt_wave_size; + b.shader->info.max_subgroup_size = compiler_info->rt_wave_size; + b.shader->info.min_subgroup_size = compiler_info->rt_wave_size; + b.shader->info.shared_size = compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t); struct radv_nir_rt_traversal_params params = {0}; @@ -1354,7 +1354,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ params.hit_attrib_size = hit_attrib_size; params.ignore_cull_mask = false; - struct radv_nir_rt_traversal_result result = radv_build_traversal(device, pipeline, &b, ¶ms, info); + struct radv_nir_rt_traversal_result result = radv_build_traversal(compiler_info, pipeline, &b, ¶ms, info); b.cursor = nir_before_impl(nir_shader_get_entrypoint(b.shader)); @@ -1366,7 +1366,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_ } b.cursor = nir_after_impl(nir_shader_get_entrypoint(b.shader)); - radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, pdev->rt_wave_size); + radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, compiler_info->rt_wave_size); nir_push_if(&b, nir_load_var(&b, result.hit)); { diff --git a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.h b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.h index 75bb3efccc6..5c48496163c 100644 --- a/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.h +++ b/src/amd/vulkan/nir/radv_nir_rt_traversal_shader.h @@ -13,7 +13,8 @@ typedef void (*radv_nir_traversal_preprocess_cb)(nir_shader *nir); void radv_nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit); -nir_shader *radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, +nir_shader *radv_build_traversal_shader(const struct radv_compiler_info *compiler_info, + struct radv_ray_tracing_pipeline *pipeline, struct radv_ray_tracing_stage_info *info, radv_nir_traversal_preprocess_cb preprocess, uint32_t payload_size, uint32_t hit_attrib_size); diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 6c486284b2c..90310c23605 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -41,13 +41,12 @@ radv_calculate_lds_size(const struct radv_shader_info *radv, const enum amd_gfx_ static inline void radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv, - const struct radv_shader_args *radv_args, const struct radv_device_cache_key *radv_key, - const enum amd_gfx_level gfx_level) + const struct radv_shader_args *radv_args, const struct radv_compiler_info *compiler_info) { bool ngg_wave_id_en = radv->ngg_wave_id_en; /* Separately compiled shader, where the next stage might use NGG streamout. */ ngg_wave_id_en |= radv->is_ngg && radv->merged_shader_compiled_separately && - radv->next_stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX11; + radv->next_stage == MESA_SHADER_GEOMETRY && compiler_info->ac->gfx_level >= GFX11; ASSIGN_FIELD(wave_size); ASSIGN_FIELD(workgroup_size); @@ -60,16 +59,17 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv ASSIGN_FIELD(descriptor_heap); aco_info->vs.any_tcs_inputs_via_lds = radv->vs.tcs_inputs_via_lds != 0; /* S2 must not be modified for correct hang recovery when NGG_WAVE_ID_EN=1. */ - aco_info->vs.preserve_s2 = ngg_wave_id_en && gfx_level < GFX12; + aco_info->vs.preserve_s2 = ngg_wave_id_en && compiler_info->ac->gfx_level < GFX12; aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena; aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr; aco_info->ps.has_prolog = false; - aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d; + aco_info->image_2d_view_of_3d = compiler_info->image_2d_view_of_3d; aco_info->epilog_pc = radv_args->epilog_pc; - aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level); + aco_info->hw_stage = radv_select_hw_stage(radv, compiler_info->ac->gfx_level); aco_info->next_stage_pc = radv_args->next_stage_pc; - aco_info->schedule_ngg_pos_exports = gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export; - aco_info->lds_size = radv_calculate_lds_size(radv, gfx_level); + aco_info->schedule_ngg_pos_exports = + compiler_info->ac->gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export; + aco_info->lds_size = radv_calculate_lds_size(radv, compiler_info->ac->gfx_level); } static inline void diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c index 2a4a54fcd6b..dbbf9c81d9f 100644 --- a/src/amd/vulkan/radv_cmd_buffer.c +++ b/src/amd/vulkan/radv_cmd_buffer.c @@ -6330,7 +6330,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer) } } - struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state); + struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(&device->compiler_info, &state); /* Adjust the remapping for alpha-to-coverage without any color attachment and dual-source * blending to make sure colors written aren't cleared. @@ -12829,7 +12829,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; uint32_t tess_num_patches, tess_lds_size; - radv_get_tess_wg_info(pdev, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out, + radv_get_tess_wg_info(&device->compiler_info, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out, d->vk.ts.patch_control_points, /* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */ vs->info.vs.num_linked_outputs, &tess_num_patches, &tess_lds_size); @@ -13104,10 +13104,10 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer) if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) { gfx10_ngg_set_esgs_ring_itemsize(&es->info, &gs->info, &gs->info.ngg_info); - gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info); + gfx10_get_ngg_info(&device->compiler_info, &es->info, &gs->info, &gs->info.ngg_info); radv_precompute_registers_hw_ngg(device, gs); } else { - radv_get_legacy_gs_info(device, &es->info, &gs->info); + radv_get_legacy_gs_info(&device->compiler_info, &es->info, &gs->info); radv_precompute_registers_hw_gs(device, &es->info, gs); cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->regs.gs.esgs_ring_size); diff --git a/src/amd/vulkan/radv_physical_device.c b/src/amd/vulkan/radv_physical_device.c index e2930b83fb9..bd1b8b8967c 100644 --- a/src/amd/vulkan/radv_physical_device.c +++ b/src/amd/vulkan/radv_physical_device.c @@ -289,7 +289,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev) key->clear_lds = instance->drirc.misc.clear_lds; key->cs_wave32 = pdev->cs_wave_size == 32; - key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level && pdev->info.gfx_level < GFX8; + key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level; key->disable_shrink_image_store = instance->drirc.debug.disable_shrink_image_store; key->disable_sinking_load_input_fs = instance->drirc.debug.disable_sinking_load_input_fs; key->disable_trunc_coord = instance->drirc.debug.disable_trunc_coord; diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c index 08ee61928c8..34181a29c40 100644 --- a/src/amd/vulkan/radv_pipeline.c +++ b/src/amd/vulkan/radv_pipeline.c @@ -36,25 +36,16 @@ #include "vk_format.h" bool -radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags) +radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || - (instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info; + compiler_info->debug.capture_shaders; } bool -radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags) +radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - - /* Capture shader statistics when RGP is enabled to correlate shader hashes with Fossilize. */ - return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) || - (instance->debug_flags & (RADV_DEBUG_DUMP_SHADER_STATS | RADV_DEBUG_PSO_HISTORY)) || - device->keep_shader_info || (instance->vk.trace_mode & RADV_TRACE_MODE_RGP); + return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) || compiler_info->debug.capture_shader_stats; } bool @@ -127,14 +118,13 @@ struct radv_shader_stage_key radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage, VkPipelineCreateFlags2 flags, const void *pNext) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); + const struct radv_compiler_info *compiler_info = &device->compiler_info; mesa_shader_stage s = vk_to_mesa_shader_stage(stage->stage); struct vk_pipeline_robustness_state rs; struct radv_shader_stage_key key = {0}; - key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags); - key.keep_executable_info = radv_pipeline_capture_shaders(device, flags); + key.keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, flags); + key.keep_executable_info = radv_pipeline_capture_shaders(compiler_info, flags); if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT) key.optimisations_disabled = 1; @@ -149,12 +139,12 @@ radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineS key.descriptor_heap = 1; if (stage->stage & RADV_GRAPHICS_STAGE_BITS) { - key.version = instance->drirc.misc.override_graphics_shader_version; + key.version = compiler_info->override_graphics_shader_version; } else if (stage->stage & RADV_RT_STAGE_BITS) { - key.version = instance->drirc.misc.override_ray_tracing_shader_version; + key.version = compiler_info->override_ray_tracing_shader_version; } else { assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT); - key.version = instance->drirc.misc.override_compute_shader_version; + key.version = compiler_info->override_compute_shader_version; } vk_pipeline_robustness_state_fill(&device->vk.robustness_state, &rs, pNext, stage->pNext); @@ -262,12 +252,11 @@ non_uniform_access_callback(const nir_src *src, void *_) } void -radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, +radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stage) { - const struct radv_physical_device *pdev = radv_device_physical(device); - enum amd_gfx_level gfx_level = pdev->info.gfx_level; - const bool use_llvm = pdev->use_llvm; + enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; + const bool use_llvm = compiler_info->debug.use_llvm; bool progress; /* Wave and workgroup size should already be filled. */ @@ -318,7 +307,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts); if (progress) { NIR_PASS(_, stage->nir, nir_opt_copy_prop); - NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !pdev->cache_key.disable_shrink_image_store); + NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !compiler_info->cache_key->disable_shrink_image_store); constant_fold_for_push_const = true; } @@ -357,7 +346,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat &(ac_nir_lower_image_tex_options){ .gfx_level = gfx_level, .lower_array_layer_round_even = - !pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord, + !compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord, .fix_derivs_in_divergent_cf = stage->stage == MESA_SHADER_FRAGMENT && !use_llvm, .max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs }); @@ -378,7 +367,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat /* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before * NGG culling. */ - NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, device, stage); + NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, compiler_info, stage); NIR_PASS(_, stage->nir, nir_lower_alu_width, ac_nir_opt_vectorize_cb, &gfx_level); @@ -395,7 +384,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); NIR_PASS(_, stage->nir, nir_opt_move, sink_opts); } else { - if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs) + if (stage->stage != MESA_SHADER_FRAGMENT || !compiler_info->cache_key->disable_sinking_load_input_fs) sink_opts |= nir_move_load_input | nir_move_load_frag_coord; NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts); @@ -407,13 +396,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat * load_input can be reordered, but buffer loads can't. */ if (stage->stage == MESA_SHADER_VERTEX) { - NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info); + NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, compiler_info, stage, gfx_state); } /* Lower I/O intrinsics to memory instructions. */ - bool io_to_mem = radv_nir_lower_io_to_mem(device, stage); + bool io_to_mem = radv_nir_lower_io_to_mem(compiler_info, stage); if (lowered_ngg) { - radv_lower_ngg(device, stage, gfx_state); + radv_lower_ngg(compiler_info, stage, gfx_state); } else if (is_last_vgt_stage) { if (stage->stage != MESA_SHADER_GEOMETRY) { NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level, @@ -477,7 +466,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, ac_nir_lower_ps_late, &late_options); } - if (radv_shader_should_clear_lds(device, stage->nir)) { + if (radv_shader_should_clear_lds(compiler_info, stage->nir)) { const unsigned chunk_size = 16; /* max single store size */ const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size); NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size); @@ -504,18 +493,18 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat .allow_fp16 = gfx_level >= GFX9, }); - NIR_PASS(_, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac, - &(ac_nir_lower_intrinsics_to_args_options){ - .gfx_level = gfx_level, - .has_ls_vgpr_init_bug = - pdev->info.compiler_info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog, - .hw_stage = radv_select_hw_stage(&stage->info, gfx_level), - .wave_size = stage->info.wave_size, - .workgroup_size = stage->info.workgroup_size, - .use_llvm = use_llvm, - .load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr, - }); - NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi); + NIR_PASS( + _, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac, + &(ac_nir_lower_intrinsics_to_args_options){ + .gfx_level = gfx_level, + .has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog, + .hw_stage = radv_select_hw_stage(&stage->info, gfx_level), + .wave_size = stage->info.wave_size, + .workgroup_size = stage->info.workgroup_size, + .use_llvm = use_llvm, + .load_grid_size_from_user_sgpr = compiler_info->load_grid_size_from_user_sgpr, + }); + NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, compiler_info->hw.address32_hi); if (!stage->key.optimisations_disabled) { NIR_PASS(_, stage->nir, nir_opt_dce); @@ -545,7 +534,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat NIR_PASS(_, stage->nir, ac_nir_lower_global_access); NIR_PASS(_, stage->nir, nir_lower_int64); - if (pdev->cache_key.mitigate_smem_oob) + if (compiler_info->cache_key->mitigate_smem_oob) NIR_PASS(_, stage->nir, ac_nir_fixup_mem_access_gfx6, &stage->args.ac, 4096, true, true); bool opt_intrinsics = false; @@ -639,13 +628,11 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat } bool -radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader) +radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader) { - const struct radv_physical_device *pdev = radv_device_physical(device); - return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH || shader->info.stage == MESA_SHADER_TASK) && - shader->info.shared_size > 0 && pdev->cache_key.clear_lds; + shader->info.shared_size > 0 && compiler_info->cache_key->clear_lds; } static uint32_t diff --git a/src/amd/vulkan/radv_pipeline.h b/src/amd/vulkan/radv_pipeline.h index e41b0f57ade..ae5a7b01561 100644 --- a/src/amd/vulkan/radv_pipeline.h +++ b/src/amd/vulkan/radv_pipeline.h @@ -26,6 +26,7 @@ struct radv_graphics_state_key; struct radv_shader_layout; struct nir_shader; typedef struct nir_shader nir_shader; +struct radv_compiler_info; enum radv_pipeline_type { RADV_PIPELINE_GRAPHICS, @@ -72,9 +73,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_P return (struct radv_##pipe_type##_pipeline *)pipeline; \ } -bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags); +bool radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags); -bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags); +bool radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags); bool radv_pipeline_skip_shaders_cache(const struct radv_device *device, const struct radv_pipeline *pipeline); @@ -96,10 +97,10 @@ void radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout, void radv_pipeline_stage_finish(struct radv_shader_stage *stage); -void radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, - struct radv_shader_stage *stage); +void radv_postprocess_nir(const struct radv_compiler_info *compiler_info, + const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stage); -bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader); +bool radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader); VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount, const VkPipelineShaderStageCreateInfo *pStages, diff --git a/src/amd/vulkan/radv_pipeline_cache.c b/src/amd/vulkan/radv_pipeline_cache.c index 083bcc5fe26..ebf67a119b0 100644 --- a/src/amd/vulkan/radv_pipeline_cache.c +++ b/src/amd/vulkan/radv_pipeline_cache.c @@ -130,30 +130,16 @@ radv_shader_cache_serialize(struct vk_pipeline_cache_object *object, struct blob } static bool -radv_is_cache_disabled(const struct radv_device *device, const struct vk_pipeline_cache *cache) +radv_is_cache_disabled(const struct radv_compiler_info *compiler_info, const struct vk_pipeline_cache *cache) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - - /* The buffer address used for debug printf is hardcoded. */ - if (device->debug_nir.printf.buffer_addr) - return true; - - /* The buffer address used for validating VAs is hardcoded. */ - if (device->debug_nir.valid_va.buffer_addr) - return true; - - /* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and - * when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere. - */ - if ((instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags())) + if (compiler_info->cache_disabled) return true; if (!cache) { /* When the application doesn't provide a pipeline cache and the in-memory cache is also * disabled. */ - cache = device->mem_cache; + cache = compiler_info->mem_cache; if (!cache) return true; } @@ -165,7 +151,7 @@ struct radv_shader * radv_shader_create(struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_shader_binary *binary, bool skip_cache, struct radv_shader_debug_info *dbg) { - if (radv_is_cache_disabled(device, cache) || skip_cache || (dbg && dbg->dump_shader)) { + if (radv_is_cache_disabled(&device->compiler_info, cache) || skip_cache || (dbg && dbg->dump_shader)) { struct radv_shader *shader; radv_shader_create_uncached(device, binary, false, NULL, dbg, &shader); return shader; @@ -339,7 +325,7 @@ radv_pipeline_cache_object_search(struct radv_device *device, struct vk_pipeline { *found_in_application_cache = false; - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(&device->compiler_info, cache)) return NULL; bool *found = found_in_application_cache; @@ -404,7 +390,7 @@ radv_compute_pipeline_cache_search(struct radv_device *device, struct vk_pipelin void radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline) { - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(&device->compiler_info, cache)) return; if (!cache) @@ -490,7 +476,7 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip pipeline->stages[i].shader = radv_shader_ref(pipeline_obj->shaders[idx++]); if (pipeline->stages[i].needs_nir) { - pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(device, cache, pipeline->stages[i].blake3); + pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, pipeline->stages[i].blake3); complete &= pipeline->stages[i].nir != NULL; } } @@ -510,7 +496,7 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages, unsigned num_groups) { - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(&device->compiler_info, cache)) return; if (!cache) @@ -574,49 +560,48 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip } nir_shader * -radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, mesa_shader_stage stage, - const blake3_hash key) +radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, + mesa_shader_stage stage, const blake3_hash key) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(compiler_info, cache)) return NULL; if (!cache) - cache = device->mem_cache; + cache = compiler_info->mem_cache; - return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL); + return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &compiler_info->nir_options[stage], NULL, NULL); } void -radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key, - const nir_shader *nir) +radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, + const blake3_hash key, const nir_shader *nir) { - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(compiler_info, cache)) return; if (!cache) - cache = device->mem_cache; + cache = compiler_info->mem_cache; vk_pipeline_cache_add_nir(cache, key, sizeof(blake3_hash), nir); } struct vk_pipeline_cache_object * -radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *blake3) +radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, + const uint8_t *blake3) { - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(compiler_info, cache)) return NULL; if (!cache) - cache = device->mem_cache; + cache = compiler_info->mem_cache; return vk_pipeline_cache_lookup_object(cache, blake3, BLAKE3_KEY_LEN, &vk_raw_data_cache_object_ops, NULL); } struct nir_shader * -radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object) +radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info, + struct vk_pipeline_cache_object *object) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct blob_reader blob; struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base); blob_reader_init(&blob, nir_object->data, nir_object->data_size); @@ -626,7 +611,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline ralloc_free(nir); return NULL; } - nir->options = &pdev->nir_options[nir->info.stage]; + nir->options = &compiler_info->nir_options[nir->info.stage]; return nir; } @@ -652,7 +637,7 @@ radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline blob_finish_get_buffer(&blob, &data, &size); struct vk_pipeline_cache_object *object; - if (cached && !radv_is_cache_disabled(device, cache)) { + if (cached && !radv_is_cache_disabled(&device->compiler_info, cache)) { object = vk_pipeline_cache_create_and_insert_object(cache, blake3, BLAKE3_KEY_LEN, data, size, &vk_raw_data_cache_object_ops); } else { @@ -675,7 +660,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC *found_in_internal_cache = false; - if (radv_is_cache_disabled(device, cache)) + if (radv_is_cache_disabled(&device->compiler_info, cache)) return VK_SUCCESS; struct vk_pipeline_cache_object *object = @@ -711,7 +696,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC shader = pipeline_obj->shaders[idx++]; if (data->is_library) - nir = radv_pipeline_cache_lookup_nir_handle(device, cache, data->stages[i].blake3); + nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, data->stages[i].blake3); result = radv_create_pipeline_binary_from_rt_shader(device, pAllocator, shader, false, data->stages[i].blake3, &stage_data->info, stage_data->stack_size, nir, diff --git a/src/amd/vulkan/radv_pipeline_cache.h b/src/amd/vulkan/radv_pipeline_cache.h index 2bebd157fee..5680d92341c 100644 --- a/src/amd/vulkan/radv_pipeline_cache.h +++ b/src/amd/vulkan/radv_pipeline_cache.h @@ -27,6 +27,7 @@ struct radv_shader_binary; struct radv_shader_debug_info; struct radv_shader_stage; struct radv_spirv_to_nir_options; +struct radv_compiler_info; struct util_dynarray; struct nir_shader; typedef struct nir_shader nir_shader; @@ -55,17 +56,18 @@ void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct v struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages, unsigned num_groups); -nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, - mesa_shader_stage stage, const blake3_hash key); +nir_shader *radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info, + struct vk_pipeline_cache *cache, mesa_shader_stage stage, + const blake3_hash key); -void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key, - const nir_shader *nir); +void radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, + const blake3_hash key, const nir_shader *nir); -struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, +struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, const unsigned char *blake3); -struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device, +struct nir_shader *radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache_object *object); struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device, diff --git a/src/amd/vulkan/radv_pipeline_compute.c b/src/amd/vulkan/radv_pipeline_compute.c index 6159c99d2e5..93223d811bd 100644 --- a/src/amd/vulkan/radv_pipeline_compute.c +++ b/src/amd/vulkan/radv_pipeline_compute.c @@ -98,35 +98,35 @@ radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct } struct radv_shader_binary * -radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage, bool keep_executable_info, - bool keep_statistic_info, bool is_internal, struct radv_shader_debug_info *dbg) +radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *cs_stage, + bool keep_executable_info, bool keep_statistic_info, bool is_internal, + struct radv_shader_debug_info *dbg) { - struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_instance *instance = radv_physical_device_instance(pdev); /* Compile SPIR-V shader to NIR. */ - cs_stage->nir = radv_shader_spirv_to_nir(device, cs_stage, NULL, is_internal); + cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal); radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled); /* Run the shader info pass. */ radv_nir_shader_info_init(cs_stage->stage, MESA_SHADER_NONE, &cs_stage->info); - radv_nir_shader_info_pass(device, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL, RADV_PIPELINE_COMPUTE, - false, &cs_stage->info); + radv_nir_shader_info_pass(compiler_info, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL, + RADV_PIPELINE_COMPUTE, false, &cs_stage->info); - radv_declare_shader_args(device, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &cs_stage->args, dbg); + radv_declare_shader_args(compiler_info, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, + &cs_stage->args, dbg); cs_stage->info.user_sgprs_locs = cs_stage->args.user_sgprs_locs; cs_stage->info.inline_push_constant_mask = cs_stage->args.ac.inline_push_const_mask; /* Postprocess NIR. */ - radv_postprocess_nir(device, NULL, cs_stage); + radv_postprocess_nir(compiler_info, NULL, cs_stage); - dbg->dump_shader = radv_can_dump_shader(device, cs_stage->nir); - bool dump_nir = dbg->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR); + dbg->dump_shader = radv_can_dump_shader(compiler_info, cs_stage->nir); + bool dump_nir = dbg->dump_shader && compiler_info->debug.dump_nir; if (dbg->dump_shader) { - simple_mtx_lock(&instance->shader_dump_mtx); + simple_mtx_lock(compiler_info->debug.shader_dump_mtx); if (dump_nir) { nir_print_shader(cs_stage->nir, stderr); @@ -134,23 +134,23 @@ radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage, } /* Compile NIR shader to AMD assembly. */ - struct radv_shader_binary *cs_binary = - radv_shader_nir_to_asm(device, cs_stage, &cs_stage->nir, 1, NULL, keep_executable_info, keep_statistic_info); + struct radv_shader_binary *cs_binary = radv_shader_nir_to_asm(compiler_info, cs_stage, &cs_stage->nir, 1, NULL, + keep_executable_info, keep_statistic_info); /* Dump NIR after nir_to_asm, because ACO modifies it. */ char *nir_string = NULL; if (keep_executable_info || dbg->dump_shader) - nir_string = radv_dump_nir_shaders(instance, &cs_stage->nir, 1); + nir_string = radv_dump_nir_shaders(compiler_info, &cs_stage->nir, 1); - radv_parse_binary_debug_info(device, cs_binary, dbg); + radv_parse_binary_debug_info(compiler_info, cs_binary, dbg); dbg->nir_string = nir_string; dbg->stages = 1 << MESA_SHADER_COMPUTE; - radv_shader_dump_asm(device, dbg, &cs_stage->info); + radv_shader_dump_asm(compiler_info, dbg, &cs_stage->info); if (dbg->dump_shader) - simple_mtx_unlock(&instance->shader_dump_mtx); + simple_mtx_unlock(compiler_info->debug.shader_dump_mtx); if (keep_executable_info && cs_stage->spirv.size) { dbg->spirv = malloc(cs_stage->spirv.size); @@ -185,8 +185,9 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st struct vk_pipeline_cache *cache, const VkPipelineShaderStageCreateInfo *pStage, const VkPipelineCreationFeedbackCreateInfo *creation_feedback) { - bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags); - bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags); + const struct radv_compiler_info *compiler_info = &device->compiler_info; + bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags); + bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags); const bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base); struct radv_shader_stage cs_stage = {0}; VkPipelineCreationFeedback pipeline_feedback = { @@ -220,8 +221,8 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st radv_pipeline_stage_init(pipeline->base.create_flags, pStage, pipeline_layout, &stage_key, &cs_stage); struct radv_shader_debug_info cs_dbg = {0}; - struct radv_shader_binary *cs_binary = radv_compile_cs(device, &cs_stage, keep_executable_info, keep_statistic_info, - pipeline->base.is_internal, &cs_dbg); + struct radv_shader_binary *cs_binary = radv_compile_cs(compiler_info, &cs_stage, keep_executable_info, + keep_statistic_info, pipeline->base.is_internal, &cs_dbg); pipeline->base.shaders[MESA_SHADER_COMPUTE] = radv_shader_create(device, cache, cs_binary, skip_shaders_cache, &cs_dbg); @@ -232,7 +233,7 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st } free(cs_binary); - if (radv_can_dump_shader_stats(device, cs_stage.nir)) { + if (radv_can_dump_shader_stats(&device->compiler_info, cs_stage.nir)) { radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], stderr); } radv_pipeline_stage_finish(&cs_stage); diff --git a/src/amd/vulkan/radv_pipeline_compute.h b/src/amd/vulkan/radv_pipeline_compute.h index ecaf073b756..61adc788057 100644 --- a/src/amd/vulkan/radv_pipeline_compute.h +++ b/src/amd/vulkan/radv_pipeline_compute.h @@ -42,8 +42,9 @@ void radv_get_compute_shader_metadata(const struct radv_device *device, const st void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout, struct radv_shader *shader); -struct radv_shader_binary *radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage, - bool keep_executable_info, bool keep_statistic_info, bool is_internal, +struct radv_shader_binary *radv_compile_cs(const struct radv_compiler_info *compiler_info, + struct radv_shader_stage *cs_stage, bool keep_executable_info, + bool keep_statistic_info, bool is_internal, struct radv_shader_debug_info *dbg); VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, diff --git a/src/amd/vulkan/radv_pipeline_graphics.c b/src/amd/vulkan/radv_pipeline_graphics.c index 5ef265c5ced..7a61709e6ff 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.c +++ b/src/amd/vulkan/radv_pipeline_graphics.c @@ -98,18 +98,17 @@ radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor } static unsigned -radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable, +radv_choose_spi_color_format(const struct radv_compiler_info *compiler_info, VkFormat vk_format, bool blend_enable, bool blend_need_alpha) { - const struct radv_physical_device *pdev = radv_device_physical(device); const struct util_format_description *desc = radv_format_description(vk_format); - bool use_rbplus = pdev->info.rbplus_allowed; + bool use_rbplus = compiler_info->hw.rbplus_allowed; struct ac_spi_color_formats formats = {0}; unsigned format, ntype, swap; - format = ac_get_cb_format(pdev->info.gfx_level, desc->format); + format = ac_get_cb_format(compiler_info->ac->gfx_level, desc->format); ntype = ac_get_cb_number_type(desc->format); - swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false); + swap = ac_translate_colorswap(compiler_info->ac->gfx_level, desc->format, false); ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats); @@ -1464,9 +1463,8 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g } struct radv_ps_epilog_key -radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state) +radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_state *state) { - const struct radv_physical_device *pdev = radv_device_physical(device); unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0, no_signed_zero = 0; struct radv_ps_epilog_key key; @@ -1484,7 +1482,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ } else { const bool blend_enable = (state->color_blend_enable >> i) & 0x1u; - cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i)); + cf = radv_choose_spi_color_format(compiler_info, fmt, blend_enable, state->need_src_alpha & (1 << i)); uint32_t comp_used = util_format_colormask(vk_format_description(fmt)); @@ -1530,9 +1528,9 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ state->alpha_to_coverage_via_mrtz); key.spi_shader_col_format = col_format; - key.color_is_int8 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int8 : 0; - key.color_is_int10 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0; - key.enable_mrt_output_nan_fixup = pdev->cache_key.enable_mrt_output_nan_fixup ? is_float32 : 0; + key.color_is_int8 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int8 : 0; + key.color_is_int10 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int10 : 0; + key.enable_mrt_output_nan_fixup = compiler_info->cache_key->enable_mrt_output_nan_fixup ? is_float32 : 0; key.no_signed_zero = no_signed_zero; key.colors_written = state->colors_written; key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf; @@ -1547,7 +1545,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ } static struct radv_ps_epilog_key -radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state) +radv_pipeline_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info, + const struct vk_graphics_pipeline_state *state) { struct radv_ps_epilog_state ps_epilog = {0}; @@ -1605,14 +1604,14 @@ radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const str ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i; } - return radv_generate_ps_epilog_key(device, &ps_epilog); + return radv_generate_ps_epilog_key(compiler_info, &ps_epilog); } static struct radv_graphics_state_key -radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state, +radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info, + const struct vk_graphics_pipeline_state *state, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, uint32_t custom_blend_mode) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_graphics_state_key key; memset(&key, 0, sizeof(key)); @@ -1646,7 +1645,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor; /* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */ - if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) { + if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && compiler_info->ac->gfx_level < GFX8) { /* From the Vulkan spec 1.2.157: * * "If the bound pipeline state object was created with the @@ -1666,7 +1665,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct } const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info( - pdev->info.gfx_level, pdev->info.compiler_info.has_vtx_format_alpha_adjust_bug, format); + compiler_info->ac->gfx_level, compiler_info->ac->has_vtx_format_alpha_adjust_bug, format); unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size; /* If offset is misaligned, then the buffer offset must be too. Just skip updating @@ -1691,7 +1690,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct * alpha-to-one is enabled (alpha to MRTZ.a and one to MRT0.a). */ key.ms.alpha_to_coverage_via_mrtz = - alpha_to_coverage_enabled && (pdev->info.gfx_level >= GFX11 || alpha_to_one_enabled); + alpha_to_coverage_enabled && (compiler_info->ac->gfx_level >= GFX11 || alpha_to_one_enabled); if (state->ms) { key.ms.sample_shading_enable = state->ms->sample_shading_enable; @@ -1711,29 +1710,29 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct } if (state->rs) { - if (pdev->info.gfx_level >= GFX10) + if (compiler_info->ac->gfx_level >= GFX10) key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_CULL_MODE)) key.rs.cull_mode = state->rs->cull_mode; } - key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state); + key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state); - if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && pdev->info.compiler_info.has_vrs_frag_pos_z_bug) + if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug) key.adjust_frag_coord_z = true; if (radv_pipeline_needs_ps_epilog(state, lib_flags)) key.ps.has_epilog = true; - key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state); + key.ps.epilog = radv_pipeline_generate_ps_epilog_key(compiler_info, state); /* Alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also exported. * Though, when a PS epilog is needed and the MS state is NULL (with dynamic rendering), it's not * possible to know the info at compile time and MRTZ needs to be exported in the epilog. */ if (key.ps.has_epilog) { - if (pdev->info.gfx_level >= GFX11) { + if (compiler_info->ac->gfx_level >= GFX11) { key.ps.exports_mrtz_via_epilog = alpha_to_coverage_unknown; } else { key.ps.exports_mrtz_via_epilog = @@ -1744,7 +1743,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) || (!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms); - if (pdev->use_ngg) { + if (compiler_info->use_ngg) { VkShaderStageFlags ngg_stage; if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) { @@ -1767,7 +1766,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct key.enable_remove_point_size = true; } - if (device->vk.enabled_features.smoothLines) { + if (compiler_info->smooth_lines) { /* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time. * This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also * simplifies the implementation. @@ -1784,7 +1783,8 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct !(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT); } - key.dcc_decompress_gfx11 = pdev->info.gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11; + key.dcc_decompress_gfx11 = + compiler_info->ac->gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11; return key; } @@ -1794,6 +1794,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr const struct vk_graphics_pipeline_state *state, VkGraphicsPipelineLibraryFlagBitsEXT lib_flags) { + const struct radv_compiler_info *compiler_info = &device->compiler_info; VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo); struct radv_graphics_pipeline_key key = {0}; uint32_t custom_blend_mode = 0; @@ -1804,7 +1805,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr custom_blend_mode = radv_info->custom_blend_mode; } - key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags, custom_blend_mode); + key.gfx_state = radv_generate_graphics_state_key(compiler_info, state, lib_flags, custom_blend_mode); for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i]; @@ -1820,12 +1821,10 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr } static void -radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages, +radv_fill_shader_info_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages, VkShaderStageFlagBits active_nir_stages) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (!pdev->cache_key.use_ngg) + if (!compiler_info->cache_key->use_ngg) return; if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) { @@ -1836,7 +1835,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage * stages[MESA_SHADER_MESH].info.is_ngg = true; } - if (pdev->info.gfx_level >= GFX11) { + if (compiler_info->ac->gfx_level >= GFX11) { if (stages[MESA_SHADER_GEOMETRY].nir) stages[MESA_SHADER_GEOMETRY].info.is_ngg = true; } else { @@ -1861,7 +1860,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage * } if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) || - (pdev->cache_key.no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) { + (compiler_info->cache_key->no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) { /* NGG needs to be disabled on GFX10/GFX10.3 when: * - streamout is used because NGG streamout isn't supported * - NGG GS is explictly disabled to workaround performance issues @@ -1980,7 +1979,7 @@ radv_get_next_stage(mesa_shader_stage stage, VkShaderStageFlagBits active_nir_st } static void -radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type, +radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum radv_pipeline_type pipeline_type, const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages, VkShaderStageFlagBits active_nir_stages) { @@ -1991,23 +1990,22 @@ radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]); } - radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type, - consider_force_vrs, &stages[i].info); + radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, + pipeline_type, consider_force_vrs, &stages[i].info); } - radv_nir_shader_info_link(device, gfx_state, stages); + radv_nir_shader_info_link(compiler_info, gfx_state, stages); } static void -radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages, +radv_declare_pipeline_args(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages, struct radv_shader_debug_info *debug) { - const struct radv_physical_device *pdev = radv_device_physical(device); - enum amd_gfx_level gfx_level = pdev->info.gfx_level; + enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) { - radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL, + radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL, MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args, &debug[MESA_SHADER_TESS_CTRL]); stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs; stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask = @@ -2024,8 +2022,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) { mesa_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; - radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage, - &stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]); + radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, + pre_stage, &stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]); stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs; stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask = stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask; @@ -2038,20 +2036,19 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage } u_foreach_bit (i, active_nir_stages) { - radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args, &debug[i]); + radv_declare_shader_args(compiler_info, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args, + &debug[i]); stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs; stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask; } } static struct radv_shader_binary * -radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache, +radv_create_gs_copy_shader(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state, bool keep_executable_info, bool keep_statistic_info, struct radv_shader_debug_info *gs_copy_debug) { - const struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_instance *instance = radv_physical_device_instance(pdev); const struct radv_shader_info *gs_info = &gs_stage->info; nir_shader *nir = gs_stage->gs_copy_shader; @@ -2068,8 +2065,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache }, }; radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info); - radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false, - &gs_copy_stage.info); + radv_nir_shader_info_pass(compiler_info, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, + false, &gs_copy_stage.info); gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */ gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ gs_copy_stage.info.so = gs_info->so; @@ -2077,59 +2074,56 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex; gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY; - radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE, + radv_declare_shader_args(compiler_info, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE, &gs_copy_stage.args, gs_copy_debug); gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs; gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask; NIR_PASS( _, nir, ac_nir_lower_intrinsics_to_args, &gs_copy_stage.args.ac, - &(ac_nir_lower_intrinsics_to_args_options){.gfx_level = pdev->info.gfx_level, - .has_ls_vgpr_init_bug = pdev->info.compiler_info.has_ls_vgpr_init_bug, + &(ac_nir_lower_intrinsics_to_args_options){.gfx_level = compiler_info->ac->gfx_level, + .has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug, .hw_stage = AC_HW_VERTEX_SHADER, .wave_size = 64, .workgroup_size = 64, - .use_llvm = pdev->use_llvm}); - NIR_PASS(_, nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi); + .use_llvm = compiler_info->debug.use_llvm}); + NIR_PASS(_, nir, radv_nir_lower_abi, compiler_info->ac->gfx_level, &gs_copy_stage, gfx_state, compiler_info->hw.address32_hi); NIR_PASS(_, nir, ac_nir_lower_global_access); NIR_PASS(_, nir, nir_lower_int64); struct radv_graphics_pipeline_key key = {0}; - gs_copy_debug->dump_shader = radv_can_dump_shader(device, nir); + gs_copy_debug->dump_shader = radv_can_dump_shader(compiler_info, nir); if (gs_copy_debug->dump_shader) - simple_mtx_lock(&instance->shader_dump_mtx); + simple_mtx_lock(compiler_info->debug.shader_dump_mtx); - struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state, - keep_executable_info, keep_statistic_info); + struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm( + compiler_info, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info, keep_statistic_info); char *nir_string = NULL; if (keep_executable_info || gs_copy_debug->dump_shader) - nir_string = radv_dump_nir_shaders(instance, &nir, 1); + nir_string = radv_dump_nir_shaders(compiler_info, &nir, 1); - radv_parse_binary_debug_info(device, gs_copy_binary, gs_copy_debug); + radv_parse_binary_debug_info(compiler_info, gs_copy_binary, gs_copy_debug); gs_copy_debug->nir_string = nir_string; gs_copy_debug->stages = 1 << MESA_SHADER_VERTEX; - radv_shader_dump_asm(device, gs_copy_debug, &gs_copy_stage.info); + radv_shader_dump_asm(compiler_info, gs_copy_debug, &gs_copy_stage.info); if (gs_copy_debug->dump_shader) - simple_mtx_unlock(&instance->shader_dump_mtx); + simple_mtx_unlock(compiler_info->debug.shader_dump_mtx); return gs_copy_binary; } static void -radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache, +radv_graphics_shaders_nir_to_asm(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state, bool keep_executable_info, bool keep_statistic_info, VkShaderStageFlagBits active_nir_stages, struct radv_shader_debug_info *debug, struct radv_shader_binary **binaries, struct radv_shader_debug_info *gs_copy_debug, struct radv_shader_binary **gs_copy_binary) { - const struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_instance *instance = radv_physical_device_instance(pdev); - for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) { if (!(active_nir_stages & (1 << s))) continue; @@ -2138,7 +2132,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ unsigned shader_count = 1; /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ - if (pdev->info.gfx_level >= GFX9 && + if (compiler_info->ac->gfx_level >= GFX9 && ((s == MESA_SHADER_GEOMETRY && (active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) || (s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) { @@ -2158,12 +2152,12 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ int64_t stage_start = os_time_get_nano(); for (unsigned i = 0; i < shader_count; ++i) - debug[s].dump_shader |= radv_can_dump_shader(device, nir_shaders[i]); + debug[s].dump_shader |= radv_can_dump_shader(compiler_info, nir_shaders[i]); - bool dump_nir = debug[s].dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR); + bool dump_nir = debug[s].dump_shader && compiler_info->debug.dump_nir; if (debug[s].dump_shader) { - simple_mtx_lock(&instance->shader_dump_mtx); + simple_mtx_lock(compiler_info->debug.shader_dump_mtx); if (dump_nir) { for (uint32_t i = 0; i < shader_count; i++) @@ -2171,23 +2165,23 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ } } - binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state, + binaries[s] = radv_shader_nir_to_asm(compiler_info, &stages[s], nir_shaders, shader_count, gfx_state, keep_executable_info, keep_statistic_info); /* Dump NIR after nir_to_asm, because ACO modifies it. */ char *nir_string = NULL; if (keep_executable_info || debug[s].dump_shader) - nir_string = radv_dump_nir_shaders(instance, nir_shaders, shader_count); + nir_string = radv_dump_nir_shaders(compiler_info, nir_shaders, shader_count); - radv_parse_binary_debug_info(device, binaries[s], &debug[s]); + radv_parse_binary_debug_info(compiler_info, binaries[s], &debug[s]); debug[s].nir_string = nir_string; for (uint32_t i = 0; i < shader_count; i++) debug[s].stages |= 1 << nir_shaders[i]->info.stage; - radv_shader_dump_asm(device, &debug[s], &stages[s].info); + radv_shader_dump_asm(compiler_info, &debug[s], &stages[s].info); if (debug[s].dump_shader) - simple_mtx_unlock(&instance->shader_dump_mtx); + simple_mtx_unlock(compiler_info->debug.shader_dump_mtx); if (keep_executable_info && stages[s].spirv.size) { debug[s].spirv = malloc(stages[s].spirv.size); @@ -2196,7 +2190,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_ } if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) { - *gs_copy_binary = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state, + *gs_copy_binary = radv_create_gs_copy_shader(compiler_info, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state, keep_executable_info, keep_statistic_info, gs_copy_debug); } @@ -2402,16 +2396,15 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGr } void -radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache, +radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state, bool keep_executable_info, bool keep_statistic_info, bool is_internal, struct radv_retained_shaders *retained_shaders, bool noop_fs, struct radv_shader_debug_info *debug, struct radv_shader_binary **binaries, struct radv_shader_debug_info *gs_copy_debug, struct radv_shader_binary **gs_copy_binary) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE; + const bool nir_cache = compiler_info->enable_nir_cache; + for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { if (stages[s].stage == MESA_SHADER_NONE) continue; @@ -2428,12 +2421,12 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac if (nir_cache) { radv_hash_graphics_spirv_to_nir(key, &stages[s], &options); - stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key); + stages[s].nir = radv_pipeline_cache_lookup_nir(compiler_info, cache, s, key); } if (!stages[s].nir) { - stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal); + stages[s].nir = radv_shader_spirv_to_nir(compiler_info, &stages[s], &options, is_internal); if (nir_cache) - radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir); + radv_pipeline_cache_insert_nir(compiler_info, cache, key, stages[s].nir); } } @@ -2450,7 +2443,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac active_nir_stages |= mesa_to_vk_shader_stage(i); } - if (!pdev->info.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && + if (!compiler_info->hw.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir && BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) { nir_shader *mesh = stages[MESA_SHADER_MESH].nir; nir_shader *task = stages[MESA_SHADER_TASK].nir; @@ -2482,7 +2475,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac } /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */ - radv_fill_shader_info_ngg(device, stages, active_nir_stages); + radv_fill_shader_info_ngg(compiler_info, stages, active_nir_stages); if (stages[MESA_SHADER_GEOMETRY].nir) { unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream; @@ -2624,7 +2617,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac } /* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */ - radv_graphics_shaders_link_varyings(stages, pdev->info.gfx_level); + radv_graphics_shaders_link_varyings(stages, compiler_info->ac->gfx_level); /* Optimize constant clip/cull distance after linking to operate on scalar io in the last * pre raster stage. @@ -2642,14 +2635,14 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac stages[i].feedback.duration += os_time_get_nano() - stage_start; } - radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); + radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages); - radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages, debug); + radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug); radv_foreach_stage (i, active_nir_stages) { int64_t stage_start = os_time_get_nano(); - radv_postprocess_nir(device, gfx_state, &stages[i]); + radv_postprocess_nir(compiler_info, gfx_state, &stages[i]); stages[i].feedback.duration += os_time_get_nano() - stage_start; } @@ -2661,16 +2654,16 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac struct radv_shader_stage *stage = gs_stage ? gs_stage : es_stage; if ((gs_stage ? gs_stage : es_stage)->info.is_ngg) { - gfx10_get_ngg_info(device, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info); + gfx10_get_ngg_info(compiler_info, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info); stage->info.nir_shared_size = stage->info.ngg_info.lds_size; } } if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg) - radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info); + radv_get_legacy_gs_info(compiler_info, NULL, &stages[MESA_SHADER_GEOMETRY].info); /* Compile NIR shaders to AMD assembly. */ - radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info, + radv_graphics_shaders_nir_to_asm(compiler_info, cache, stages, gfx_state, keep_executable_info, keep_statistic_info, active_nir_stages, debug, binaries, gs_copy_debug, gs_copy_binary); } @@ -2835,10 +2828,11 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device, struct vk_pipeline_cache *cache, bool fast_linking_enabled) { + const struct radv_compiler_info *compiler_info = &device->compiler_info; struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL}; struct radv_shader_binary *gs_copy_binary = NULL; - bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags); - bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags); + bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags); + bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags); bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base); struct radv_shader_stage *stages = gfx_state->stages; const VkPipelineCreationFeedbackCreateInfo *creation_feedback = @@ -2918,7 +2912,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0}; struct radv_shader_debug_info gs_copy_debug = {0}; - radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info, + radv_graphics_shaders_compile(compiler_info, cache, stages, &gfx_state->key.gfx_state, keep_executable_info, keep_statistic_info, pipeline->base.is_internal, retained_shaders, noop_fs, debug, binaries, &gs_copy_debug, &gs_copy_binary); radv_graphics_shaders_create(device, cache, skip_shaders_cache, pipeline->base.shaders, binaries, debug, @@ -2932,7 +2926,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { free(binaries[i]); if (stages[i].nir) { - if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) { + if (radv_can_dump_shader_stats(&device->compiler_info, stages[i].nir) && pipeline->base.shaders[i]) { radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], stderr); } } diff --git a/src/amd/vulkan/radv_pipeline_graphics.h b/src/amd/vulkan/radv_pipeline_graphics.h index 0f6a14273e9..7979ea78495 100644 --- a/src/amd/vulkan/radv_pipeline_graphics.h +++ b/src/amd/vulkan/radv_pipeline_graphics.h @@ -651,10 +651,10 @@ struct radv_ps_epilog_state { uint8_t need_src_alpha; }; -struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device, +struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_state *state); -void radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache, +void radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state, bool keep_executable_info, bool keep_statistic_info, bool is_internal, struct radv_retained_shaders *retained_shaders, bool noop_fs, diff --git a/src/amd/vulkan/radv_pipeline_rt.c b/src/amd/vulkan/radv_pipeline_rt.c index 1b57d64e60f..d1ab88c6aaf 100644 --- a/src/amd/vulkan/radv_pipeline_rt.c +++ b/src/amd/vulkan/radv_pipeline_rt.c @@ -374,16 +374,13 @@ move_rt_instructions(nir_shader *shader) } static void -radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, +radv_rt_nir_to_asm(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline, enum radv_rt_lowering_mode mode, struct radv_shader_stage *stage, uint32_t *payload_size, uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *stage_info, const struct radv_ray_tracing_stage_info *traversal_stage_info, bool has_position_fetch, struct radv_shader_binary **binary, bool keep_executable_info, bool keep_statistic_info, struct radv_shader_debug_info *debug) { - struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_instance *instance = radv_physical_device_instance(pdev); - switch (mode) { case RADV_RT_LOWERING_MODE_MONOLITHIC: radv_nir_lower_rt_io_monolithic(stage->nir); @@ -399,11 +396,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline /* Gather shader info. */ nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir)); radv_nir_shader_info_init(stage->stage, MESA_SHADER_NONE, &stage->info); - radv_nir_shader_info_pass(device, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, false, - &stage->info); + radv_nir_shader_info_pass(compiler_info, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, + false, &stage->info); /* Declare shader arguments. */ - radv_declare_shader_args(device, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug); + radv_declare_shader_args(compiler_info, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug); stage->info.user_sgprs_locs = stage->args.user_sgprs_locs; stage->info.inline_push_constant_mask = stage->args.ac.inline_push_const_mask; @@ -427,7 +424,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline .stack_alignment = 16, .localized_loads = true, .vectorizer_callback = ac_nir_mem_vectorize_callback, - .vectorizer_data = &(struct ac_nir_config){pdev->info.gfx_level, !pdev->use_llvm}, + .vectorizer_data = &(struct ac_nir_config){compiler_info->ac->gfx_level, !compiler_info->debug.use_llvm}, }; nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, mem_ctx); } @@ -449,27 +446,27 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline switch (mode) { case RADV_RT_LOWERING_MODE_MONOLITHIC: assert(num_shaders == 1); - radv_nir_lower_rt_abi_monolithic(temp_stage.nir, device, pipeline); + radv_nir_lower_rt_abi_monolithic(temp_stage.nir, compiler_info, pipeline); break; case RADV_RT_LOWERING_MODE_CPS: - radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, device, pipeline, has_position_fetch, + radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, compiler_info, pipeline, has_position_fetch, traversal_stage_info); break; case RADV_RT_LOWERING_MODE_FUNCTION_CALLS: assert(num_shaders == 1); - radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size, device, - pipeline); + radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size, + compiler_info, pipeline); break; } /* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */ nir_shader_gather_info(temp_stage.nir, radv_get_rt_shader_entrypoint(temp_stage.nir)); - radv_nir_shader_info_pass(device, temp_stage.nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, - false, &stage->info); + radv_nir_shader_info_pass(compiler_info, temp_stage.nir, &stage->layout, &stage->key, NULL, + RADV_PIPELINE_RAY_TRACING, false, &stage->info); radv_optimize_nir(temp_stage.nir, temp_stage.key.optimisations_disabled); - radv_postprocess_nir(device, NULL, &temp_stage); + radv_postprocess_nir(compiler_info, NULL, &temp_stage); NIR_PASS(_, stage->nir, radv_nir_lower_call_abi, stage->info.wave_size); NIR_PASS(_, stage->nir, nir_lower_global_vars_to_local); @@ -484,11 +481,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline radv_gather_unused_args(stage_info, temp_stage.nir); } - debug->dump_shader = radv_can_dump_shader(device, stage->nir); - bool dump_nir = debug->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR); + debug->dump_shader = radv_can_dump_shader(compiler_info, stage->nir); + bool dump_nir = debug->dump_shader && compiler_info->debug.dump_nir; if (debug->dump_shader) { - simple_mtx_lock(&instance->shader_dump_mtx); + simple_mtx_lock(compiler_info->debug.shader_dump_mtx); if (dump_nir) { for (uint32_t i = 0; i < num_shaders; i++) @@ -497,17 +494,17 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline } /* Compile NIR shader to AMD assembly. */ - *binary = - radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info); + *binary = radv_shader_nir_to_asm(compiler_info, stage, shaders, num_shaders, NULL, keep_executable_info, + keep_statistic_info); /* Dump NIR after nir_to_asm, because ACO modifies it. */ if (keep_executable_info || debug->dump_shader) - debug->nir_string = radv_dump_nir_shaders(instance, shaders, num_shaders); + debug->nir_string = radv_dump_nir_shaders(compiler_info, shaders, num_shaders); - radv_parse_binary_debug_info(device, *binary, debug); + radv_parse_binary_debug_info(compiler_info, *binary, debug); debug->stages = 1 << shaders[0]->info.stage; - radv_shader_dump_asm(device, debug, &stage->info); + radv_shader_dump_asm(compiler_info, debug, &stage->info); if (keep_executable_info && stage->spirv.size) { debug->spirv = malloc(stage->spirv.size); @@ -516,7 +513,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline } if (debug->dump_shader) - simple_mtx_unlock(&instance->shader_dump_mtx); + simple_mtx_unlock(compiler_info->debug.shader_dump_mtx); ralloc_free(mem_ctx); } @@ -530,16 +527,18 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_serialized_shader_arena_block *replay_block, bool skip_shaders_cache, bool has_position_fetch, struct radv_shader **out_shader) { - bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags); - bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags); + const struct radv_compiler_info *compiler_info = &device->compiler_info; + bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.base.create_flags); + bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.base.create_flags); bool replayable = (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR) && !radv_is_traversal_shader(stage->nir); struct radv_shader_binary *binary; struct radv_shader_debug_info debug = {0}; - radv_rt_nir_to_asm(device, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info, traversal_stage_info, - has_position_fetch, &binary, keep_executable_info, keep_statistic_info, &debug); + radv_rt_nir_to_asm(compiler_info, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info, + traversal_stage_info, has_position_fetch, &binary, keep_executable_info, keep_statistic_info, + &debug); struct radv_shader *shader; if (replay_block || replayable) { @@ -559,7 +558,7 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache, *out_shader = shader; - if (radv_can_dump_shader_stats(device, stage->nir)) + if (radv_can_dump_shader_stats(compiler_info, stage->nir)) radv_dump_shader_stats(device, &pipeline->base.base, shader, stderr); return shader ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY; @@ -653,10 +652,10 @@ radv_ray_tracing_stage_nir_always_needed(struct radv_ray_tracing_stage *stage) } static void -radv_rt_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage, uint32_t *payload_size, - uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info) +radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage, + uint32_t *payload_size, uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info) { - stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false); + stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false); nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) { unsigned size, alignment; @@ -729,7 +728,7 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca &stage_keys[s], stage); /* precompile the shader */ - radv_rt_spirv_to_nir(device, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info); + radv_rt_spirv_to_nir(&device->compiler_info, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info); stage->feedback.duration = os_time_get_nano() - stage_start; } @@ -854,12 +853,13 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca if (ahit_idx == VK_SHADER_UNUSED_KHR && isec_idx == VK_SHADER_UNUSED_KHR) continue; + const struct radv_compiler_info *compiler_info = &device->compiler_info; nir_shader *ahit = NULL; nir_shader *isec = NULL; if (ahit_idx != VK_SHADER_UNUSED_KHR) - ahit = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[ahit_idx].nir); + ahit = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[ahit_idx].nir); if (isec_idx != VK_SHADER_UNUSED_KHR) - isec = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[isec_idx].nir); + isec = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[isec_idx].nir); nir_shader *final_shader; if (isec) { @@ -934,8 +934,8 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca preprocess = NULL; /* create traversal shader */ - nir_shader *traversal_nir = - radv_build_traversal_shader(device, pipeline, &traversal_info, preprocess, payload_size, hit_attrib_size); + nir_shader *traversal_nir = radv_build_traversal_shader(&device->compiler_info, pipeline, &traversal_info, + preprocess, payload_size, hit_attrib_size); struct radv_shader_stage traversal_stage = { .stage = MESA_SHADER_INTERSECTION, .nir = traversal_nir, @@ -1049,10 +1049,10 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline * struct radv_shader_stage prolog_stage = {0}; struct radv_shader_debug_info debug = {0}; - radv_build_rt_prolog(device, &prolog_stage, uses_descriptor_heap, &debug); + radv_build_rt_prolog(&device->compiler_info, &prolog_stage, uses_descriptor_heap, &debug); prolog_stage.nir->options = &pdev->nir_options[MESA_SHADER_COMPUTE]; radv_optimize_nir(prolog_stage.nir, false); - radv_postprocess_nir(device, NULL, &prolog_stage); + radv_postprocess_nir(&device->compiler_info, NULL, &prolog_stage); NIR_PASS(_, prolog_stage.nir, radv_nir_lower_call_abi, prolog_stage.info.wave_size); NIR_PASS(_, prolog_stage.nir, nir_lower_global_vars_to_local); diff --git a/src/amd/vulkan/radv_sampler.c b/src/amd/vulkan/radv_sampler.c index 8ba0e4c27c8..52f68947e00 100644 --- a/src/amd/vulkan/radv_sampler.c +++ b/src/amd/vulkan/radv_sampler.c @@ -132,10 +132,10 @@ radv_tex_filter_mode(VkSamplerReductionMode mode) } static uint32_t -radv_get_max_anisotropy(const struct radv_device *device, const struct vk_sampler_state *sampler_state) +radv_get_max_anisotropy(const struct radv_compiler_info *compiler_info, const struct vk_sampler_state *sampler_state) { - if (device->force_aniso >= 0) - return device->force_aniso; + if (compiler_info->force_aniso >= 0) + return compiler_info->force_aniso; if (sampler_state->anisotropy_enable && sampler_state->max_anisotropy > 1.0f) return (uint32_t)sampler_state->max_anisotropy; @@ -199,19 +199,17 @@ radv_unregister_border_color(struct radv_device *device, uint32_t index) } void -radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state, - uint32_t *desc) +radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info, + const struct vk_sampler_state *sampler_state, uint32_t *desc) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - const uint32_t max_aniso = radv_get_max_anisotropy(device, sampler_state); + const uint32_t max_aniso = radv_get_max_anisotropy(compiler_info, sampler_state); const uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); const unsigned filter_mode = radv_tex_filter_mode(sampler_state->reduction_mode); unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; const bool trunc_coord = ((sampler_state->min_filter == VK_FILTER_NEAREST && sampler_state->mag_filter == VK_FILTER_NEAREST) || - pdev->info.compiler_info.conformant_trunc_coord) && - !instance->drirc.debug.disable_trunc_coord; + compiler_info->ac->conformant_trunc_coord) && + !compiler_info->cache_key->disable_trunc_coord; const VkBorderColor border_color = radv_get_border_color(sampler_state); const bool disable_cube_wrap = sampler_state->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT; @@ -238,12 +236,12 @@ radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_s .min_lod = sampler_state->min_lod, .max_lod = sampler_state->max_lod, .lod_bias = sampler_state->mip_lod_bias, - .aniso_single_level = !instance->drirc.debug.disable_aniso_single_level, + .aniso_single_level = !compiler_info->cache_key->disable_aniso_single_level, .border_color_type = radv_tex_bordercolor(border_color), .border_color_ptr = border_color_ptr, }; - ac_build_sampler_descriptor(pdev->info.gfx_level, &ac_state, desc); + ac_build_sampler_descriptor(compiler_info->ac->gfx_level, &ac_state, desc); } VkResult @@ -285,7 +283,7 @@ radv_sampler_init(struct radv_device *device, struct radv_sampler *sampler, cons } } - radv_make_sampler_descriptor(device, &sampler_state, sampler->state); + radv_make_sampler_descriptor(&device->compiler_info, &sampler_state, sampler->state); return VK_SUCCESS; } diff --git a/src/amd/vulkan/radv_sampler.h b/src/amd/vulkan/radv_sampler.h index dbb8abad71b..b58e8e73ae7 100644 --- a/src/amd/vulkan/radv_sampler.h +++ b/src/amd/vulkan/radv_sampler.h @@ -14,6 +14,7 @@ #include "vk_sampler.h" struct radv_device; +struct radv_compiler_info; struct radv_sampler { struct vk_sampler vk; @@ -28,7 +29,7 @@ VkResult radv_sampler_init(struct radv_device *device, struct radv_sampler *samp const VkSamplerCreateInfo *pCreateInfo); void radv_sampler_finish(struct radv_device *device, struct radv_sampler *sampler); -void radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state, - uint32_t *desc); +void radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info, + const struct vk_sampler_state *sampler_state, uint32_t *desc); #endif /* RADV_SAMPLER_H */ diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index bc7b0a44a33..52343a9c66e 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -150,28 +150,22 @@ radv_dump_flag_for_stage(const mesa_shader_stage stage) } bool -radv_can_dump_shader(struct radv_device *device, nir_shader *nir) +radv_can_dump_shader(const struct radv_compiler_info *compiler_info, nir_shader *nir) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - if (is_meta_shader(nir) && nir->info.stage != MESA_SHADER_INTERSECTION) - return instance->debug_flags & RADV_DEBUG_DUMP_META_SHADERS; + return compiler_info->debug.dump_meta_shaders; if (!nir) return false; - return instance->debug_flags & radv_dump_flag_for_stage(nir->info.stage); + return compiler_info->debug.dump_shaders & mesa_to_vk_shader_stage(nir->info.stage); } bool -radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir) +radv_can_dump_shader_stats(const struct radv_compiler_info *compiler_info, nir_shader *nir) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - /* Only dump non-meta shader stats. */ - return instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS && !is_meta_shader(nir); + return compiler_info->debug.dump_shader_stats && !is_meta_shader(nir); } void @@ -359,25 +353,22 @@ radv_spirv_nir_debug(void *private_data, enum nir_spirv_debug_level level, size_ } static void -radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir, +radv_shader_choose_subgroup_size(const struct radv_compiler_info *compiler_info, nir_shader *nir, const struct radv_shader_stage_key *stage_key, unsigned spirv_version) { - const struct radv_physical_device *pdev = radv_device_physical(device); - VkPipelineShaderStageRequiredSubgroupSizeCreateInfo rss_info = { .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO, .requiredSubgroupSize = stage_key->subgroup_required_size * 32, }; /* Do not allow for the SPIR-V 1.6 varying subgroup size rules. */ - if (pdev->cache_key.no_implicit_varying_subgroup_size) + if (compiler_info->cache_key->no_implicit_varying_subgroup_size) spirv_version = 0x10000; - struct vk_properties *properties = &device->vk.physical->properties; - - vk_set_subgroup_size(nir, properties->subgroupSize, properties->minSubgroupSize, properties->maxSubgroupSize, - spirv_version, rss_info.requiredSubgroupSize ? &rss_info : NULL, - stage_key->subgroup_allow_varying, stage_key->subgroup_require_full); + vk_set_subgroup_size(nir, compiler_info->subgroup_size, compiler_info->min_subgroup_size, + compiler_info->max_subgroup_size, spirv_version, + rss_info.requiredSubgroupSize ? &rss_info : NULL, stage_key->subgroup_allow_varying, + stage_key->subgroup_require_full); nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); @@ -391,11 +382,11 @@ radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir, unsigned default_wave_size; if (nir->info.ray_queries) - default_wave_size = pdev->rt_wave_size; + default_wave_size = compiler_info->rt_wave_size; else if (nir->info.stage == MESA_SHADER_MESH) - default_wave_size = pdev->ge_wave_size; + default_wave_size = compiler_info->ge_wave_size; else - default_wave_size = pdev->cs_wave_size; + default_wave_size = compiler_info->cs_wave_size; /* Games don't always request full subgroups when they should, which can cause bugs if cswave32 * is enabled. Furthermore, if cooperative matrices or subgroup info are used, we can't transparently change @@ -415,18 +406,18 @@ radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir, else wave_size = default_wave_size; } else if (nir->info.stage == MESA_SHADER_GEOMETRY && - (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX10_3)) { + (compiler_info->ac->gfx_level >= GFX10 && compiler_info->ac->gfx_level <= GFX10_3)) { /* Legacy GS doesn't support wave32. */ wave_size = 64; } else if (nir->info.stage == MESA_SHADER_FRAGMENT) { if (nir->info.ray_queries) - wave_size = pdev->rt_wave_size; + wave_size = compiler_info->rt_wave_size; else - wave_size = pdev->ps_wave_size; + wave_size = compiler_info->ps_wave_size; } else if (mesa_shader_stage_is_rt(nir->info.stage)) { - wave_size = pdev->rt_wave_size; + wave_size = compiler_info->rt_wave_size; } else { - wave_size = pdev->ge_wave_size; + wave_size = compiler_info->ge_wave_size; } if (nir->info.api_subgroup_size == 0) { @@ -438,7 +429,7 @@ radv_shader_choose_subgroup_size(struct radv_device *device, nir_shader *nir, /* We might still decide to use ngg later. */ if (nir->info.stage == MESA_SHADER_GEOMETRY) - nir->info.min_subgroup_size = pdev->ge_wave_size; + nir->info.min_subgroup_size = compiler_info->ge_wave_size; else nir->info.min_subgroup_size = wave_size; } @@ -465,17 +456,15 @@ ycbcr_conversion_lookup(const void *data, uint32_t set, uint32_t binding, uint32 } nir_shader * -radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage, +radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage, const struct radv_spirv_to_nir_options *options, bool is_internal) { - const struct radv_physical_device *pdev = radv_device_physical(device); - struct radv_instance *instance = radv_physical_device_instance(pdev); struct vk_sampler_state_array embedded_samplers; nir_shader *nir; bool progress; if (stage->internal_nir) { - stage->internal_nir->options = &pdev->nir_options[stage->internal_nir->info.stage]; + stage->internal_nir->options = &compiler_info->nir_options[stage->internal_nir->info.stage]; /* Some things such as our meta clear/blit code will give us a NIR * shader directly. In that case, we just ignore the SPIR-V entirely @@ -485,29 +474,28 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s nir_validate_shader(nir, "in internal shader"); assert(exec_list_length(&nir->functions) == 1); - radv_shader_choose_subgroup_size(device, nir, &stage->key, 0); + radv_shader_choose_subgroup_size(compiler_info, nir, &stage->key, 0); } else { uint32_t *spirv = (uint32_t *)stage->spirv.data; assert(stage->spirv.size % 4 == 0); - if (instance->debug_flags & RADV_DEBUG_DUMP_SPIRV) { - const uint64_t dump_flags = - is_internal ? RADV_DEBUG_DUMP_META_SHADERS : radv_dump_flag_for_stage(stage->stage); - if (instance->debug_flags & dump_flags) - spirv_print_asm(stderr, (const uint32_t *)stage->spirv.data, stage->spirv.size / 4); - } + const bool dump_spirv = compiler_info->debug.dump_spirv && + (is_internal ? compiler_info->debug.dump_meta_shaders + : compiler_info->debug.dump_shaders & mesa_to_vk_shader_stage(stage->stage)); + if (dump_spirv) + spirv_print_asm(stderr, (const uint32_t *)stage->spirv.data, stage->spirv.size / 4); + struct nir_spirv_specialization *spec = vk_spec_info_to_nir_spirv(stage->spec_info); struct radv_shader_debug_data spirv_debug_data = { - .debug_report = &instance->vk.debug_report, + .debug_report = compiler_info->debug.debug_report, .object = stage->spirv.object, }; - const struct spirv_capabilities spirv_caps = vk_physical_device_get_spirv_capabilities(device->vk.physical); const struct spirv_to_nir_options spirv_options = { .amd_gcn_shader = true, .amd_shader_ballot = true, .amd_shader_explicit_vertex_parameter = true, .amd_trinary_minmax = true, - .capabilities = &spirv_caps, + .capabilities = &compiler_info->spirv_caps, .ubo_addr_format = nir_address_format_vec2_index_32bit_offset, .ssbo_addr_format = nir_address_format_vec2_index_32bit_offset, .phys_ssbo_addr_format = nir_address_format_64bit_global, @@ -521,22 +509,22 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s }, .workarounds = { - .force_tex_non_uniform = pdev->cache_key.tex_non_uniform, - .force_ssbo_non_uniform = pdev->cache_key.ssbo_non_uniform, - .lower_terminate_to_discard = pdev->cache_key.lower_terminate_to_discard, + .force_tex_non_uniform = compiler_info->cache_key->tex_non_uniform, + .force_ssbo_non_uniform = compiler_info->cache_key->ssbo_non_uniform, + .lower_terminate_to_discard = compiler_info->cache_key->lower_terminate_to_discard, }, - .emit_debug_break = !!device->trap_handler_shader, - .debug_info = !!(instance->debug_flags & RADV_DEBUG_NIR_DEBUG_INFO), - .printf = !!device->debug_nir.printf.buffer_addr, - .sampler_descriptor_size = pdev->vk.properties.samplerDescriptorSize, - .sampler_descriptor_alignment = pdev->vk.properties.samplerDescriptorAlignment, - .image_descriptor_size = pdev->vk.properties.imageDescriptorSize, - .image_descriptor_alignment = pdev->vk.properties.imageDescriptorAlignment, - .buffer_descriptor_size = pdev->vk.properties.bufferDescriptorSize, - .buffer_descriptor_alignment = pdev->vk.properties.bufferDescriptorAlignment, + .emit_debug_break = compiler_info->debug.trap_enabled, + .debug_info = compiler_info->debug.nir_debug_info, + .printf = compiler_info->debug.printf_enabled, + .sampler_descriptor_size = compiler_info->sampler_descriptor_size, + .sampler_descriptor_alignment = compiler_info->sampler_descriptor_alignment, + .image_descriptor_size = compiler_info->image_descriptor_size, + .image_descriptor_alignment = compiler_info->image_descriptor_alignment, + .buffer_descriptor_size = compiler_info->buffer_descriptor_size, + .buffer_descriptor_alignment = compiler_info->buffer_descriptor_alignment, }; nir = spirv_to_nir(spirv, stage->spirv.size / 4, spec, stage->stage, stage->entrypoint, &spirv_options, - &pdev->nir_options[stage->stage]); + &compiler_info->nir_options[stage->stage]); nir->info.internal |= is_internal; assert(nir->info.stage == stage->stage); nir_validate_shader(nir, "after spirv_to_nir"); @@ -577,7 +565,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s */ NIR_PASS(_, nir, nir_lower_variable_initializers, ~0); - radv_shader_choose_subgroup_size(device, nir, &stage->key, vk_spirv_version(spirv, stage->spirv.size)); + radv_shader_choose_subgroup_size(compiler_info, nir, &stage->key, vk_spirv_version(spirv, stage->spirv.size)); progress = false; NIR_PASS(progress, nir, nir_lower_cooperative_matrix_flexible_dimensions, 16, 16, 16); @@ -587,7 +575,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp | nir_var_shader_temp, NULL); } - NIR_PASS(progress, nir, radv_nir_lower_cooperative_matrix, pdev->info.gfx_level, nir->info.max_subgroup_size); + NIR_PASS(progress, nir, radv_nir_lower_cooperative_matrix, compiler_info->ac->gfx_level, + nir->info.max_subgroup_size); if (progress) { NIR_PASS(_, nir, nir_opt_dce); NIR_PASS(progress, nir, nir_inline_functions); @@ -625,7 +614,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s NIR_PASS(_, nir, nir_lower_vars_to_ssa); - NIR_PASS(_, nir, nir_propagate_invariant, pdev->cache_key.invariant_geom); + NIR_PASS(_, nir, nir_propagate_invariant, compiler_info->cache_key->invariant_geom); nir_gather_clip_cull_distance_sizes_from_vars(nir); NIR_PASS(_, nir, nir_merge_clip_cull_distance_vars); @@ -636,7 +625,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s nir_lower_doubles_options lower_doubles = nir->options->lower_doubles_options; - if (pdev->info.gfx_level == GFX6) { + if (compiler_info->ac->gfx_level == GFX6) { /* GFX6 doesn't support v_floor_f64 and the precision * of v_fract_f64 which is used to implement 64-bit * floor is less than what Vulkan requires. @@ -650,14 +639,14 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s } if (nir->info.uses_printf) - NIR_PASS(_, nir, radv_nir_lower_printf, &device->debug_nir); + NIR_PASS(_, nir, radv_nir_lower_printf, compiler_info->debug.debug_nir); if (options && options->lower_view_index_to_device_index) NIR_PASS(_, nir, nir_lower_view_index_to_device_index); NIR_PASS(_, nir, nir_lower_system_values); - if (pdev->info.gfx_level < GFX12 && nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) { + if (compiler_info->ac->gfx_level < GFX12 && nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) { nir_lower_compute_system_values_options csv_options = { .shuffle_local_ids_for_quad_derivatives = true, .lower_local_invocation_index = true, @@ -670,7 +659,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s bool lower_local_invocation_index = false; if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || - (nir->info.stage == MESA_SHADER_MESH && pdev->info.mesh_fast_launch_2)) { + (nir->info.stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2)) { lower_local_invocation_index = nir->info.derivative_group == DERIVATIVE_GROUP_QUADS || (((nir->info.workgroup_size[0] == 1) + (nir->info.workgroup_size[1] == 1) + (nir->info.workgroup_size[2] == 1)) == 2); @@ -680,7 +669,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s /* Mesh shaders run as NGG which can implement local_invocation_index from * the wave ID in merged_wave_info, but they don't have local_invocation_ids on GFX10.3. */ - .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !pdev->info.mesh_fast_launch_2, + .lower_cs_local_id_to_index = nir->info.stage == MESA_SHADER_MESH && !compiler_info->hw.mesh_fast_launch_2, .lower_local_invocation_index = lower_local_invocation_index, }; NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options); @@ -700,7 +689,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s NIR_PASS(_, nir, nir_opt_ray_queries); NIR_PASS(_, nir, nir_opt_ray_query_ranges); - NIR_PASS(_, nir, radv_nir_lower_ray_queries, device); + NIR_PASS(_, nir, radv_nir_lower_ray_queries, compiler_info); } nir_lower_tex_options tex_options = { @@ -708,12 +697,12 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s .lower_txf_offset = true, .lower_tg4_offsets = true, .lower_txs_cube_array = true, - .lower_to_fragment_fetch_amd = pdev->use_fmask, + .lower_to_fragment_fetch_amd = compiler_info->use_fmask, .lower_lod_zero_width = true, .lower_invalid_implicit_lod = true, - .lower_1d = pdev->info.gfx_level == GFX9, + .lower_1d = compiler_info->ac->gfx_level == GFX9, .optimize_txd = true, - .lower_tg4_shadow_to_16bit = pdev->info.compiler_info.has_fma_mix, + .lower_tg4_shadow_to_16bit = compiler_info->ac->has_fma_mix, }; NIR_PASS(_, nir, nir_lower_tex, &tex_options); @@ -737,8 +726,8 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s NIR_PASS(_, nir, nir_lower_global_vars_to_local); NIR_PASS(_, nir, nir_remove_dead_variables, nir_var_function_temp, NULL); - bool gfx7minus = pdev->info.gfx_level <= GFX7; - bool use_llvm = pdev->use_llvm; + bool gfx7minus = compiler_info->ac->gfx_level <= GFX7; + bool use_llvm = compiler_info->debug.use_llvm; NIR_PASS(_, nir, nir_lower_subgroups, &(struct nir_lower_subgroups_options){ @@ -762,7 +751,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s }); NIR_PASS(_, nir, nir_lower_load_const_to_scalar); - NIR_PASS(_, nir, nir_opt_shrink_stores, !pdev->cache_key.disable_shrink_image_store); + NIR_PASS(_, nir, nir_opt_shrink_stores, !compiler_info->cache_key->disable_shrink_image_store); if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_discard) NIR_PASS(_, nir, nir_lower_discard_if, nir_move_terminate_out_of_loops); @@ -861,7 +850,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s nir->info.stage == MESA_SHADER_MESH) && nir->info.outputs_written & VARYING_BIT_PRIMITIVE_SHADING_RATE) { /* Lower primitive shading rate to match HW requirements. */ - NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, pdev->info.gfx_level); + NIR_PASS(_, nir, radv_nir_lower_primitive_shading_rate, compiler_info->ac->gfx_level); } /* Indirect lowering must be called after the radv_optimize_nir() loop @@ -880,7 +869,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s } /* Lower immutable/embedded sampler derefs to vec4. */ - NIR_PASS(_, nir, radv_nir_lower_immediate_samplers, device, stage); + NIR_PASS(_, nir, radv_nir_lower_immediate_samplers, compiler_info, stage); progress = false; NIR_PASS(progress, nir, nir_vk_lower_ycbcr_tex, ycbcr_conversion_lookup, &stage->layout); @@ -888,8 +877,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s if (progress) nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - if (pdev->info.has_cs_regalloc_hang_bug && - mesa_shader_stage_is_compute(nir->info.stage)) { + if (compiler_info->hw.has_cs_regalloc_hang_bug && mesa_shader_stage_is_compute(nir->info.stage)) { const uint32_t wg_size = nir->info.workgroup_size[0] * nir->info.workgroup_size[1] * nir->info.workgroup_size[2]; @@ -908,7 +896,7 @@ radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *s } bool -radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader *nir, uint64_t ps_inputs_read, +radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir_shader *nir, uint64_t ps_inputs_read, unsigned num_vertices_per_primitive, const struct radv_shader_info *info) { /* Culling doesn't make sense for meta shaders. */ @@ -919,7 +907,7 @@ radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader if (info->vs.has_prolog) return false; - if (!pdev->cache_key.use_ngg_culling) + if (!compiler_info->cache_key->use_ngg_culling) return false; /* Shader based culling efficiency can depend on PS throughput. @@ -927,9 +915,9 @@ radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader */ unsigned max_ps_params = 8; - if (pdev->info.gfx_level >= GFX10_3 && pdev->info.has_dedicated_vram) + if (compiler_info->ac->gfx_level >= GFX10_3 && compiler_info->hw.has_dedicated_vram) max_ps_params = 12; /* GFX10.3 and newer discrete GPUs. */ - else if (pdev->info.gfx_level == GFX10 && pdev->info.has_dedicated_vram) + else if (compiler_info->ac->gfx_level == GFX10 && compiler_info->hw.has_dedicated_vram) max_ps_params = 12; /* TODO: consider other heuristics here, such as PS execution time */ @@ -966,10 +954,9 @@ radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader } void -radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, +radv_lower_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage, const struct radv_graphics_state_key *gfx_state) { - const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_shader_info *info = &ngg_stage->info; nir_shader *nir = ngg_stage->nir; @@ -1010,7 +997,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, } ac_nir_lower_ngg_options options = {0}; - options.compiler_info = &pdev->info.compiler_info; + options.compiler_info = compiler_info->ac; options.max_workgroup_size = info->workgroup_size; options.wave_size = info->wave_size; options.export_clipdist_mask = info->outinfo.clip_dist_mask | info->outinfo.cull_dist_mask; @@ -1018,9 +1005,9 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, options.vs_output_param_offset = info->outinfo.vs_output_param_offset; options.has_param_exports = info->outinfo.param_exports || info->outinfo.prim_param_exports; options.can_cull = info->has_ngg_culling; - options.disable_streamout = !pdev->use_ngg_streamout; + options.disable_streamout = !compiler_info->use_ngg_streamout; options.has_xfb_prim_query = info->has_xfb_query; - options.has_gs_primitives_query = pdev->info.gfx_level < GFX11; + options.has_gs_primitives_query = compiler_info->ac->gfx_level < GFX11; options.force_vrs = info->force_vrs_per_vertex; options.skip_viewport_state_culling = nir->info.outputs_written & (VARYING_BIT_VIEWPORT | VARYING_BIT_VIEWPORT_MASK); @@ -1044,7 +1031,7 @@ radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, assert(info->is_ngg); options.has_gen_prim_query = info->has_prim_query; - options.has_ms_gs_invocations_query = pdev->info.gfx_level < GFX11; + options.has_ms_gs_invocations_query = compiler_info->ac->gfx_level < GFX11; NIR_PASS(_, nir, ac_nir_lower_ngg_gs, &options, &ngg_stage->info.ngg_lds_vertex_size, &ngg_stage->info.ngg_lds_scratch_size); @@ -2170,11 +2157,10 @@ radv_mem_ordered(enum amd_gfx_level gfx_level) } static bool -radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_binary *binary, +radv_postprocess_binary_config(const struct radv_compiler_info *compiler_info, struct radv_shader_binary *binary, const struct radv_shader_args *args) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); + enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; struct ac_shader_config *config = &binary->config; if (binary->type == RADV_BINARY_TYPE_RTLD) { @@ -2183,17 +2169,17 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi #else struct ac_rtld_binary rtld_binary = {0}; - if (!radv_open_rtld_binary(pdev->info.gfx_level, binary, &rtld_binary)) { + if (!radv_open_rtld_binary(compiler_info->ac->gfx_level, binary, &rtld_binary)) { return false; } - if (!ac_rtld_read_config(&pdev->info.compiler_info, &rtld_binary, config)) { + if (!ac_rtld_read_config(compiler_info->ac, &rtld_binary, config)) { ac_rtld_close(&rtld_binary); return false; } /* Calculate LDS allocation requirements. */ - config->lds_size = radv_calculate_lds_size(&binary->info, pdev->info.gfx_level); + config->lds_size = radv_calculate_lds_size(&binary->info, compiler_info->ac->gfx_level); ac_rtld_close(&rtld_binary); #endif @@ -2202,10 +2188,10 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi const struct radv_shader_info *info = &binary->info; mesa_shader_stage stage = binary->info.stage; bool scratch_enabled = config->scratch_bytes_per_wave > 0; - const bool trap_enabled = !!device->trap_handler_shader; + const bool trap_enabled = compiler_info->debug.trap_enabled; /* On GFX12, TRAP_PRESENT doesn't exist for compute shaders and it's enabled by default. */ - const enum ac_hw_stage hw_stage = radv_select_hw_stage(info, pdev->info.gfx_level); - const bool trap_present = trap_enabled && (pdev->info.gfx_level < GFX12 || hw_stage != AC_HW_COMPUTE_SHADER); + const enum ac_hw_stage hw_stage = radv_select_hw_stage(info, gfx_level); + const bool trap_present = trap_enabled && (gfx_level < GFX12 || hw_stage != AC_HW_COMPUTE_SHADER); unsigned vgpr_comp_cnt = 0; unsigned num_input_vgprs = args->ac.num_vgprs_used; @@ -2218,11 +2204,10 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi unsigned num_sgprs = MAX2(config->num_sgprs, args->ac.num_sgprs_used + 2 + 3); unsigned num_shared_vgprs = config->num_shared_vgprs; /* shared VGPRs are introduced in Navi and are allocated in blocks of 8 (RDNA ref 3.6.5) */ - assert((pdev->info.gfx_level >= GFX10 && num_shared_vgprs % 8 == 0) || - (pdev->info.gfx_level < GFX10 && num_shared_vgprs == 0)); + assert((gfx_level >= GFX10 && num_shared_vgprs % 8 == 0) || (gfx_level < GFX10 && num_shared_vgprs == 0)); unsigned num_shared_vgpr_blocks = num_shared_vgprs / 8; unsigned excp_en = 0, excp_en_msb = 0; - bool dx10_clamp = pdev->info.gfx_level < GFX11_7; + bool dx10_clamp = gfx_level < GFX11_7; config->num_vgprs = num_vgprs; config->num_sgprs = num_sgprs; @@ -2233,26 +2218,26 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi if (trap_enabled) { /* Configure the shader exceptions like memory violation, etc. */ - if (instance->trap_excp_flags & RADV_TRAP_EXCP_MEM_VIOL) { + if (compiler_info->debug.trap_excp_flags & RADV_TRAP_EXCP_MEM_VIOL) { excp_en |= 1 << 8; /* for the graphics stages */ excp_en_msb |= 1 << 1; /* for the compute stage */ } - if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO) + if (compiler_info->debug.trap_excp_flags & RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO) excp_en |= 1 << 2; - if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_OVERFLOW) + if (compiler_info->debug.trap_excp_flags & RADV_TRAP_EXCP_FLOAT_OVERFLOW) excp_en |= 1 << 3; - if (instance->trap_excp_flags & RADV_TRAP_EXCP_FLOAT_UNDERFLOW) + if (compiler_info->debug.trap_excp_flags & RADV_TRAP_EXCP_FLOAT_UNDERFLOW) excp_en |= 1 << 4; - if (instance->trap_excp_flags & + if (compiler_info->debug.trap_excp_flags & (RADV_TRAP_EXCP_FLOAT_DIV_BY_ZERO | RADV_TRAP_EXCP_FLOAT_OVERFLOW | RADV_TRAP_EXCP_FLOAT_UNDERFLOW)) { /* It seems needed to disable DX10_CLAMP, otherwise the float exceptions aren't thrown. */ dx10_clamp = false; } } - if (pdev->info.gfx_level <= GFX10_3 && !pdev->use_ngg_streamout) { + if (gfx_level <= GFX10_3 && !compiler_info->use_ngg_streamout) { config->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) | S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) | S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) | S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) | S_00B12C_SO_EN(!!info->so.enabled_stream_buffers_mask); @@ -2261,7 +2246,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi config->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) / (info->wave_size == 32 ? 8 : 4)) | S_00B848_DX10_CLAMP(dx10_clamp) | S_00B848_FLOAT_MODE(config->float_mode); - if (pdev->info.gfx_level >= GFX10) { + if (gfx_level >= GFX10) { config->rsrc2 |= S_00B22C_USER_SGPR_MSB_GFX10(args->num_user_sgprs >> 5); } else { config->rsrc1 |= S_00B228_SGPRS((num_sgprs - 1) / 8); @@ -2269,7 +2254,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } mesa_shader_stage es_stage = MESA_SHADER_NONE; - if (pdev->info.gfx_level >= GFX9) { + if (gfx_level >= GFX9) { es_stage = stage == MESA_SHADER_GEOMETRY ? info->gs.es_type : stage; } @@ -2286,17 +2271,17 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } } - assert(config->lds_size <= pdev->info.lds_size_per_workgroup); - unsigned lds_alloc = ac_shader_encode_lds_size(config->lds_size, pdev->info.gfx_level, stage); + assert(config->lds_size <= compiler_info->hw.lds_size_per_workgroup); + unsigned lds_alloc = ac_shader_encode_lds_size(config->lds_size, gfx_level, stage); switch (stage) { case MESA_SHADER_TESS_EVAL: if (info->is_ngg) { - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc2 |= S_00B22C_OC_LDS_EN(1) | S_00B22C_EXCP_EN(excp_en); } else if (info->tes.as_es) { - assert(pdev->info.gfx_level <= GFX8); + assert(gfx_level <= GFX8); vgpr_comp_cnt = info->uses_prim_id ? 3 : 2; config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en); @@ -2304,22 +2289,22 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi bool enable_prim_id = info->outinfo.export_prim_id || info->uses_prim_id; vgpr_comp_cnt = enable_prim_id ? 3 : 2; - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B128_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B128_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en); } config->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_TESS_CTRL: - if (pdev->info.gfx_level >= GFX9) { + if (gfx_level >= GFX9) { /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ - if (pdev->info.gfx_level >= GFX10) { + if (gfx_level >= GFX10) { if (info->vs.needs_instance_id) { - vgpr_comp_cnt = pdev->info.gfx_level >= GFX12 ? 1 : 3; - } else if (pdev->info.gfx_level <= GFX10_3) { + vgpr_comp_cnt = gfx_level >= GFX12 ? 1 : 3; + } else if (gfx_level <= GFX10_3) { vgpr_comp_cnt = 1; } config->rsrc2 |= S_00B42C_EXCP_EN_GFX6(excp_en); @@ -2330,17 +2315,17 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi } else { config->rsrc2 |= S_00B12C_OC_LDS_EN(1) | S_00B12C_EXCP_EN(excp_en); } - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B428_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc1 |= S_00B428_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B42C_SHARED_VGPR_CNT(num_shared_vgpr_blocks); break; case MESA_SHADER_VERTEX: if (info->is_ngg) { - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(gfx_level)); } else if (info->vs.as_ls) { - assert(pdev->info.gfx_level <= GFX8); + assert(gfx_level <= GFX8); /* We need at least 2 components for LS. * VGPR0-3: (VertexID, RelAutoindex, InstanceID / StepRate0, InstanceID). * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. @@ -2349,7 +2334,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi */ vgpr_comp_cnt = info->vs.needs_instance_id ? 2 : 1; } else if (info->vs.as_es) { - assert(pdev->info.gfx_level <= GFX8); + assert(gfx_level <= GFX8); /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ vgpr_comp_cnt = info->vs.needs_instance_id ? 1 : 0; } else { @@ -2357,7 +2342,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi * If PrimID is disabled. InstanceID / StepRate1 is loaded instead. * StepRate0 is set to 1. so that VGPR3 doesn't have to be loaded. */ - if (info->vs.needs_instance_id && pdev->info.gfx_level >= GFX10) { + if (info->vs.needs_instance_id && gfx_level >= GFX10) { vgpr_comp_cnt = 3; } else if (info->outinfo.export_prim_id) { vgpr_comp_cnt = 2; @@ -2367,26 +2352,26 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi vgpr_comp_cnt = 0; } - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B128_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B128_MEM_ORDERED(radv_mem_ordered(gfx_level)); } config->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en); break; case MESA_SHADER_MESH: - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc2 |= S_00B12C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B12C_EXCP_EN(excp_en); break; case MESA_SHADER_FRAGMENT: - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B028_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B028_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc1 |= S_00B028_LOAD_PROVOKING_VTX(info->ps.load_provoking_vtx); config->rsrc2 |= S_00B02C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B02C_EXCP_EN(excp_en) | - S_00B02C_LOAD_COLLISION_WAVEID(info->ps.pops && pdev->info.gfx_level < GFX11); + S_00B02C_LOAD_COLLISION_WAVEID(info->ps.pops && gfx_level < GFX11); break; case MESA_SHADER_GEOMETRY: - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B228_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc2 |= S_00B22C_SHARED_VGPR_CNT(num_shared_vgpr_blocks) | S_00B22C_EXCP_EN(excp_en); break; case MESA_SHADER_RAYGEN: @@ -2397,8 +2382,8 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi case MESA_SHADER_ANY_HIT: case MESA_SHADER_COMPUTE: case MESA_SHADER_TASK: - if (pdev->info.gfx_level >= GFX10 && pdev->info.gfx_level <= GFX11_7) - config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(pdev->info.gfx_level)); + if (gfx_level >= GFX10 && gfx_level <= GFX11_7) + config->rsrc1 |= S_00B848_MEM_ORDERED(radv_mem_ordered(gfx_level)); config->rsrc1 |= S_00B848_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B84C_TGID_X_EN(info->cs.uses_block_id[0]) | S_00B84C_TGID_Y_EN(info->cs.uses_block_id[1]) | S_00B84C_TGID_Z_EN(info->cs.uses_block_id[2]) | @@ -2415,7 +2400,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi break; } - if (pdev->info.gfx_level >= GFX10 && info->is_ngg && + if (gfx_level >= GFX10 && info->is_ngg && (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_MESH)) { unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; @@ -2426,7 +2411,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi */ if (es_stage == MESA_SHADER_VERTEX) { if (info->vs.needs_instance_id) { - es_vgpr_comp_cnt = pdev->info.gfx_level >= GFX12 ? 1 : 3; + es_vgpr_comp_cnt = gfx_level >= GFX12 ? 1 : 3; } else { es_vgpr_comp_cnt = 0; } @@ -2439,10 +2424,10 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi UNREACHABLE("Unexpected ES shader stage"); } - if (stage == MESA_SHADER_MESH && pdev->info.mesh_fast_launch_2) { + if (stage == MESA_SHADER_MESH && compiler_info->hw.mesh_fast_launch_2) { /* Only VGPR0 is used for X/Y/Z local invocation ID */ gs_vgpr_comp_cnt = 0; - } else if (pdev->info.gfx_level >= GFX12) { + } else if (gfx_level >= GFX12) { if (info->gs.vertices_in >= 4) { gs_vgpr_comp_cnt = 2; /* VGPR2 contains offsets 3-5 */ } else if (info->uses_prim_id || @@ -2483,13 +2468,13 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_LDS_SIZE(lds_alloc) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL); - } else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) { + } else if (gfx_level >= GFX9 && stage == MESA_SHADER_GEOMETRY) { unsigned gs_vgpr_comp_cnt, es_vgpr_comp_cnt; if (es_stage == MESA_SHADER_VERTEX) { /* VGPR0-3: (VertexID, InstanceID / StepRate0, ...) */ if (info->vs.needs_instance_id) { - es_vgpr_comp_cnt = pdev->info.gfx_level >= GFX10 ? 3 : 1; + es_vgpr_comp_cnt = gfx_level >= GFX10 ? 3 : 1; } else { es_vgpr_comp_cnt = 0; } @@ -2515,7 +2500,7 @@ radv_postprocess_binary_config(struct radv_device *device, struct radv_shader_bi config->rsrc1 |= S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt) | S_00B228_WGP_MODE(config->wgp_mode); config->rsrc2 |= S_00B22C_ES_VGPR_COMP_CNT(es_vgpr_comp_cnt) | S_00B22C_OC_LDS_EN(es_stage == MESA_SHADER_TESS_EVAL); - } else if (pdev->info.gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) { + } else if (gfx_level >= GFX9 && stage == MESA_SHADER_TESS_CTRL) { config->rsrc1 |= S_00B428_LS_VGPR_COMP_CNT(vgpr_comp_cnt); } else { config->rsrc1 |= S_00B128_VGPR_COMP_CNT(vgpr_comp_cnt); @@ -2916,18 +2901,17 @@ radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader } VkResult -radv_parse_binary_debug_info(struct radv_device *device, const struct radv_shader_binary *binary, +radv_parse_binary_debug_info(const struct radv_compiler_info *compiler_info, const struct radv_shader_binary *binary, struct radv_shader_debug_info *dbg) { if (binary->type == RADV_BINARY_TYPE_RTLD) { #if !defined(USE_LIBELF) return VK_SUCCESS; #else - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_binary_rtld *bin = (struct radv_shader_binary_rtld *)binary; struct ac_rtld_binary rtld_binary = {0}; - if (!radv_open_rtld_binary(pdev->info.gfx_level, binary, &rtld_binary)) + if (!radv_open_rtld_binary(compiler_info->ac->gfx_level, binary, &rtld_binary)) return VK_ERROR_OUT_OF_HOST_MEMORY; const char *disasm_data; @@ -3234,9 +3218,9 @@ radv_gather_nir_debug_info(struct nir_shader *const *shaders, int shader_count) } char * -radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *const *shaders, int shader_count) +radv_dump_nir_shaders(const struct radv_compiler_info *compiler_info, struct nir_shader *const *shaders, int shader_count) { - if (instance->debug_flags & RADV_DEBUG_NIR_DEBUG_INFO) + if (compiler_info->debug.nir_debug_info) return radv_gather_nir_debug_info(shaders, shader_count); char *data = NULL; @@ -3314,28 +3298,25 @@ radv_aco_build_shader_binary(void **bin, const struct ac_shader_config *config, } static void -radv_fill_nir_compiler_options(struct radv_nir_compiler_options *options, struct radv_device *device, +radv_fill_nir_compiler_options(const struct radv_compiler_info *compiler_info, + struct radv_nir_compiler_options *options, const struct radv_graphics_state_key *gfx_state, bool should_use_wgp, bool can_dump_shader, bool keep_shader_info, bool keep_statistic_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - - options->compiler_info = &pdev->info.compiler_info; - options->gfx_level = pdev->info.gfx_level; - options->family = pdev->info.family; - options->address32_hi = pdev->info.address32_hi; + options->compiler_info = compiler_info->ac; + options->gfx_level = compiler_info->ac->gfx_level; + options->family = compiler_info->hw.family; + options->address32_hi = compiler_info->hw.address32_hi; /* robust_buffer_access_llvm here used by LLVM only, pipeline robustness is not exposed there. */ - options->robust_buffer_access_llvm = - (device->vk.enabled_features.robustBufferAccess2 || device->vk.enabled_features.robustBufferAccess); + options->robust_buffer_access_llvm = compiler_info->robust_buffer_access; options->wgp_mode = should_use_wgp; options->dump_shader = can_dump_shader; - options->dump_ir = options->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_BACKEND_IR); - options->dump_preoptir = options->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_PREOPT_IR); + options->dump_ir = options->dump_shader && compiler_info->debug.dump_backend_ir; + options->dump_preoptir = options->dump_shader && compiler_info->debug.dump_preopt_ir; options->record_asm = keep_shader_info || options->dump_shader; options->record_ir = keep_shader_info; options->record_stats = keep_statistic_info; - options->check_ir = instance->debug_flags & RADV_DEBUG_CHECKIR; + options->check_ir = compiler_info->debug.check_ir; options->enable_mrt_output_nan_fixup = gfx_state ? gfx_state->ps.epilog.enable_mrt_output_nan_fixup : false; } @@ -3354,18 +3335,17 @@ radv_set_stage_key_robustness(const struct vk_pipeline_robustness_state *rs, mes } static struct radv_shader_binary * -shader_compile(struct radv_device *device, struct nir_shader *const *shaders, int shader_count, mesa_shader_stage stage, - const struct radv_shader_info *info, const struct radv_shader_args *args, +shader_compile(const struct radv_compiler_info *compiler_info, struct nir_shader *const *shaders, int shader_count, + mesa_shader_stage stage, const struct radv_shader_info *info, const struct radv_shader_args *args, const struct radv_shader_stage_key *stage_key, struct radv_nir_compiler_options *options) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_binary *binary = NULL; #if AMD_LLVM_AVAILABLE - if (pdev->use_llvm || options->dump_shader || options->record_ir) + if (compiler_info->debug.use_llvm || options->dump_shader || options->record_ir) ac_init_llvm_once(); - if (pdev->use_llvm) { + if (compiler_info->debug.use_llvm) { llvm_compile_shader(options, info, shader_count, shaders, &binary, args); #else if (false) { @@ -3374,14 +3354,14 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in struct aco_shader_info ac_info; struct aco_compiler_options ac_opts; radv_aco_convert_opts(&ac_opts, options, args, stage_key); - radv_aco_convert_shader_info(&ac_info, info, args, &device->cache_key, pdev->info.gfx_level); + radv_aco_convert_shader_info(&ac_info, info, args, compiler_info); aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary, (void **)&binary); } binary->info = *info; - if (!radv_postprocess_binary_config(device, binary, args)) { + if (!radv_postprocess_binary_config(compiler_info, binary, args)) { free(binary); return NULL; } @@ -3390,38 +3370,34 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in } struct radv_shader_binary * -radv_shader_nir_to_asm(struct radv_device *device, struct radv_shader_stage *pl_stage, +radv_shader_nir_to_asm(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *pl_stage, struct nir_shader *const *shaders, int shader_count, const struct radv_graphics_state_key *gfx_state, bool keep_shader_info, bool keep_statistic_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); mesa_shader_stage stage = shaders[shader_count - 1]->info.stage; struct radv_shader_info *info = &pl_stage->info; bool dump_shader = false; for (unsigned i = 0; i < shader_count; ++i) - dump_shader |= radv_can_dump_shader(device, shaders[i]); + dump_shader |= radv_can_dump_shader(compiler_info, shaders[i]); struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options(&options, device, gfx_state, - radv_should_use_wgp_mode(pdev->info.gfx_level, stage, info), dump_shader, + radv_fill_nir_compiler_options(compiler_info, &options, gfx_state, + radv_should_use_wgp_mode(compiler_info->ac->gfx_level, stage, info), dump_shader, keep_shader_info, keep_statistic_info); struct radv_shader_binary *binary = - shader_compile(device, shaders, shader_count, stage, info, &pl_stage->args, &pl_stage->key, &options); + shader_compile(compiler_info, shaders, shader_count, stage, info, &pl_stage->args, &pl_stage->key, &options); return binary; } void -radv_shader_dump_asm(struct radv_device *device, const struct radv_shader_debug_info *debug, +radv_shader_dump_asm(const struct radv_compiler_info *compiler_info, const struct radv_shader_debug_info *debug, const struct radv_shader_info *info) { if (debug->dump_shader) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const struct radv_instance *instance = radv_physical_device_instance(pdev); - - if (instance->debug_flags & RADV_DEBUG_DUMP_ASM) { + if (compiler_info->debug.dump_asm) { const char *sep = ""; u_foreach_bit (stage, debug->stages) { fprintf(stderr, "%s%s", sep, radv_get_shader_name(info, stage)); @@ -3444,8 +3420,9 @@ radv_create_trap_handler_shader(struct radv_device *device) struct radv_nir_compiler_options options = {0}; const bool dump_shader = !!(instance->debug_flags & RADV_DEBUG_DUMP_TRAP_HANDLER); - radv_fill_nir_compiler_options(&options, device, NULL, radv_should_use_wgp_mode(pdev->info.gfx_level, stage, &info), - dump_shader, false, false); + radv_fill_nir_compiler_options(&device->compiler_info, &options, NULL, + radv_should_use_wgp_mode(pdev->info.gfx_level, stage, &info), dump_shader, false, + false); nir_builder b = radv_meta_nir_init_shader(stage, "meta_trap_handler"); @@ -3456,7 +3433,7 @@ radv_create_trap_handler_shader(struct radv_device *device) struct radv_shader_args args; struct radv_shader_debug_info debug = {0}; - radv_declare_shader_args(device, NULL, &info, stage, MESA_SHADER_NONE, &args, &debug); + radv_declare_shader_args(&device->compiler_info, NULL, &info, stage, MESA_SHADER_NONE, &args, &debug); #if AMD_LLVM_AVAILABLE if (options.dump_shader || options.record_ir) @@ -3467,17 +3444,17 @@ radv_create_trap_handler_shader(struct radv_device *device) struct aco_compiler_options ac_opts; struct aco_shader_info ac_info; - radv_aco_convert_shader_info(&ac_info, &info, &args, &device->cache_key, pdev->info.gfx_level); + radv_aco_convert_shader_info(&ac_info, &info, &args, &device->compiler_info); radv_aco_convert_opts(&ac_opts, &options, &args, &stage_key); aco_compile_trap_handler(&ac_opts, &ac_info, &args.ac, &radv_aco_build_shader_binary, (void **)&binary); binary->info = info; - radv_postprocess_binary_config(device, binary, &args); + radv_postprocess_binary_config(&device->compiler_info, binary, &args); struct radv_shader *shader; radv_shader_create_uncached(device, binary, false, NULL, &debug, &shader); - radv_parse_binary_debug_info(device, binary, &shader->dbg); + radv_parse_binary_debug_info(&device->compiler_info, binary, &shader->dbg); if (options.dump_shader) { fprintf(stderr, "Trap handler"); @@ -3518,19 +3495,20 @@ struct radv_shader * radv_compile_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, struct radv_shader_debug_info *debug) { + const struct radv_compiler_info *compiler_info = &device->compiler_info; const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_instance *instance = radv_physical_device_instance(pdev); struct radv_shader *prolog; struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options(&options, device, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS, + radv_fill_nir_compiler_options(compiler_info, &options, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS, radv_device_fault_detection_enabled(device), false); if (options.dump_shader) { - simple_mtx_lock(&instance->shader_dump_mtx); + simple_mtx_lock(compiler_info->debug.shader_dump_mtx); - if (instance->debug_flags & RADV_DEBUG_DUMP_NIR) + if (compiler_info->debug.dump_nir) nir_print_shader(stage->nir, stderr); } @@ -3543,21 +3521,21 @@ radv_compile_rt_prolog(struct radv_device *device, struct radv_shader_stage *sta struct radv_shader_stage_key stage_key = {0}; struct aco_shader_info ac_info; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &stage->info, &stage->args, &device->cache_key, pdev->info.gfx_level); + radv_aco_convert_shader_info(&ac_info, &stage->info, &stage->args, compiler_info); radv_aco_convert_opts(&ac_opts, &options, &stage->args, &stage_key); aco_compile_shader(&ac_opts, &ac_info, 1, &stage->nir, &stage->args.ac, &radv_aco_build_shader_binary, (void **)&binary); binary->info = stage->info; - radv_postprocess_binary_config(device, binary, &stage->args); + radv_postprocess_binary_config(compiler_info, binary, &stage->args); radv_shader_create_uncached(device, binary, false, NULL, debug, &prolog); - if (!prolog || radv_parse_binary_debug_info(device, binary, &prolog->dbg) != VK_SUCCESS) + if (!prolog || radv_parse_binary_debug_info(compiler_info, binary, &prolog->dbg) != VK_SUCCESS) goto done; if (options.dump_shader) { fprintf(stderr, "Raytracing prolog"); fprintf(stderr, "\ndisasm:\n%s\n", prolog->dbg.disasm_string); - simple_mtx_unlock(&instance->shader_dump_mtx); + simple_mtx_unlock(compiler_info->debug.shader_dump_mtx); } done: @@ -3568,12 +3546,13 @@ done: struct radv_shader_part * radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_key *key) { + const struct radv_compiler_info *compiler_info = &device->compiler_info; const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); struct radv_shader_part *prolog; struct radv_shader_args args = {0}; struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options(&options, device, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS, + radv_fill_nir_compiler_options(compiler_info, &options, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS, radv_device_fault_detection_enabled(device), false); struct radv_shader_info info = {0}; @@ -3590,7 +3569,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke struct radv_graphics_state_key gfx_state = {0}; - radv_declare_shader_args(device, &gfx_state, &info, key->next_stage, + radv_declare_shader_args(compiler_info, &gfx_state, &info, key->next_stage, key->next_stage != MESA_SHADER_VERTEX ? MESA_SHADER_VERTEX : MESA_SHADER_NONE, &args, NULL); info.user_sgprs_locs = args.user_sgprs_locs; @@ -3606,7 +3585,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke struct aco_shader_info ac_info; struct aco_vs_prolog_info ac_prolog_info; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &info, &args, &device->cache_key, pdev->info.gfx_level); + radv_aco_convert_shader_info(&ac_info, &info, &args, compiler_info); radv_aco_convert_opts(&ac_opts, &options, &args, &stage_key); radv_aco_convert_vs_prolog_key(&ac_prolog_info, key, &args); aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary); @@ -3636,12 +3615,13 @@ struct radv_shader_part * radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_key *key, struct radv_shader_part_binary **binary_out) { + const struct radv_compiler_info *compiler_info = &device->compiler_info; const struct radv_physical_device *pdev = radv_device_physical(device); const struct radv_instance *instance = radv_physical_device_instance(pdev); struct radv_shader_part *epilog; struct radv_shader_args args = {0}; struct radv_nir_compiler_options options = {0}; - radv_fill_nir_compiler_options(&options, device, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_EPILOGS, + radv_fill_nir_compiler_options(compiler_info, &options, NULL, false, instance->debug_flags & RADV_DEBUG_DUMP_EPILOGS, radv_device_fault_detection_enabled(device), false); struct radv_shader_info info = {0}; @@ -3649,7 +3629,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke info.wave_size = pdev->ps_wave_size; info.workgroup_size = 64; - radv_declare_ps_epilog_args(device, key, &args); + radv_declare_ps_epilog_args(compiler_info, key, &args); #if AMD_LLVM_AVAILABLE if (options.dump_shader || options.record_ir) @@ -3661,7 +3641,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke struct aco_shader_info ac_info; struct aco_ps_epilog_info ac_epilog_info = {0}; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &info, &args, &device->cache_key, pdev->info.gfx_level); + radv_aco_convert_shader_info(&ac_info, &info, &args, compiler_info); radv_aco_convert_opts(&ac_opts, &options, &args, &stage_key); radv_aco_convert_ps_epilog_key(&ac_epilog_info, key, &args); aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part, (void **)&binary); @@ -3878,13 +3858,13 @@ radv_get_user_sgpr(const struct radv_shader *shader, int idx) } void -radv_get_tess_wg_info(const struct radv_physical_device *pdev, const ac_nir_tess_io_info *io_info, +radv_get_tess_wg_info(const struct radv_compiler_info *compiler_info, const ac_nir_tess_io_info *io_info, unsigned tcs_vertices_out, unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned *num_patches_per_wg, unsigned *lds_size) { const uint32_t lds_input_vertex_size = get_tcs_input_vertex_stride(tcs_num_lds_inputs); - ac_nir_compute_tess_wg_info(&pdev->info.compiler_info, io_info, tcs_vertices_out, pdev->ge_wave_size, false, + ac_nir_compute_tess_wg_info(compiler_info->ac, io_info, tcs_vertices_out, compiler_info->ge_wave_size, false, tcs_num_input_vertices, lds_input_vertex_size, 0, num_patches_per_wg, lds_size); } diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h index 28f37faa6bd..353c6b9469e 100644 --- a/src/amd/vulkan/radv_shader.h +++ b/src/amd/vulkan/radv_shader.h @@ -607,7 +607,7 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_ struct radv_shader_stage; -nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage, +nir_shader *radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage, const struct radv_spirv_to_nir_options *options, bool is_internal); void radv_init_shader_arenas(struct radv_device *device); @@ -617,23 +617,22 @@ void radv_destroy_shader_upload_queue(struct radv_device *device); struct radv_shader_args; -VkResult radv_parse_binary_debug_info(struct radv_device *device, const struct radv_shader_binary *binary, +VkResult radv_parse_binary_debug_info(const struct radv_compiler_info *compiler_info, const struct radv_shader_binary *binary, struct radv_shader_debug_info *dbg); VkResult radv_shader_create_uncached(struct radv_device *device, const struct radv_shader_binary *binary, bool replayable, struct radv_serialized_shader_arena_block *replay_block, struct radv_shader_debug_info *dbg, struct radv_shader **out_shader); -struct radv_shader_binary *radv_shader_nir_to_asm(struct radv_device *device, struct radv_shader_stage *pl_stage, - struct nir_shader *const *shaders, int shader_count, - const struct radv_graphics_state_key *gfx_state, +struct radv_shader_binary *radv_shader_nir_to_asm(const struct radv_compiler_info *compiler_info, + struct radv_shader_stage *pl_stage, struct nir_shader *const *shaders, + int shader_count, const struct radv_graphics_state_key *gfx_state, bool keep_shader_info, bool keep_statistic_info); -void radv_shader_dump_asm(struct radv_device *device, const struct radv_shader_debug_info *debug, +void radv_shader_dump_asm(const struct radv_compiler_info *compiler_info, const struct radv_shader_debug_info *debug, const struct radv_shader_info *info); -struct radv_instance; -char *radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *const *shaders, int shader_count); +char *radv_dump_nir_shaders(const struct radv_compiler_info *compiler_info, struct nir_shader *const *shaders, int shader_count); VkResult radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq); @@ -692,9 +691,9 @@ unsigned radv_compute_spi_ps_input(enum amd_gfx_level gfx_level, const struct ra bool radv_is_traversal_shader(nir_shader *nir); -bool radv_can_dump_shader(struct radv_device *device, nir_shader *nir); +bool radv_can_dump_shader(const struct radv_compiler_info *compiler_info, nir_shader *nir); -bool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir); +bool radv_can_dump_shader_stats(const struct radv_compiler_info *compiler_info, nir_shader *nir); VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader, FILE *output); @@ -751,15 +750,16 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs) return stride; } -void radv_get_tess_wg_info(const struct radv_physical_device *pdev, const ac_nir_tess_io_info *io_info, +void radv_get_tess_wg_info(const struct radv_compiler_info *compiler_info, const ac_nir_tess_io_info *io_info, unsigned tcs_vertices_out, unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs, unsigned *num_patches_per_wg, unsigned *lds_size); -void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage, +void radv_lower_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage, const struct radv_graphics_state_key *gfx_state); -bool radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader *nir, uint64_t ps_inputs_read, - unsigned num_vertices_per_primitive, const struct radv_shader_info *info); +bool radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir_shader *nir, + uint64_t ps_inputs_read, unsigned num_vertices_per_primitive, + const struct radv_shader_info *info); void radv_get_nir_options(struct radv_physical_device *pdev); diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c index 45920a0ebd2..c5202d2ac75 100644 --- a/src/amd/vulkan/radv_shader_args.c +++ b/src/amd/vulkan/radv_shader_args.c @@ -10,8 +10,6 @@ */ #include "radv_shader_args.h" -#include "radv_device.h" -#include "radv_physical_device.h" #include "radv_shader.h" #include "util/memstream.h" @@ -322,11 +320,9 @@ declare_ms_input_sgprs(struct radv_shader_args_state *state, const struct radv_s } static void -declare_ms_input_vgprs(struct radv_shader_args_state *state, const struct radv_device *device) +declare_ms_input_vgprs(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - if (pdev->info.mesh_fast_launch_2) { + if (compiler_info->hw.mesh_fast_launch_2) { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed); } else { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.vertex_id); @@ -378,13 +374,13 @@ declare_ngg_sgprs(struct radv_shader_args_state *state, const struct radv_shader } static void -radv_init_shader_args(struct radv_shader_args_state *state, const struct radv_device *device, mesa_shader_stage stage) +radv_init_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state, + mesa_shader_stage stage) { - const struct radv_physical_device *pdev = radv_device_physical(device); memset(state->args, 0, sizeof(*state->args)); - state->args->explicit_scratch_args = !pdev->use_llvm; - state->args->remap_spi_ps_input = !pdev->use_llvm; + state->args->explicit_scratch_args = !compiler_info->debug.use_llvm; + state->args->remap_spi_ps_input = !compiler_info->debug.use_llvm; for (int i = 0; i < MAX_SETS; i++) state->args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1; @@ -557,12 +553,11 @@ declare_unmerged_vs_tes_gs_args(struct radv_shader_args_state *state, const enum } static void -declare_shader_args(struct radv_shader_args_state *state, const struct radv_device *device, +declare_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state, const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info, mesa_shader_stage stage, mesa_shader_stage previous_stage, struct user_sgpr_info *user_sgpr_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_gfx_level gfx_level = pdev->info.gfx_level; + const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; bool has_shader_query = info->has_prim_query || info->has_xfb_query || (stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) || (stage == MESA_SHADER_MESH && info->ms.has_query) || @@ -590,7 +585,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi } } - radv_init_shader_args(state, device, stage); + radv_init_shader_args(compiler_info, state, stage); if (mesa_shader_stage_is_rt(stage)) { return; @@ -617,7 +612,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info); if (info->cs.uses_grid_size) { - if (device->load_grid_size_from_user_sgpr) + if (compiler_info->load_grid_size_from_user_sgpr) RADV_ADD_UD_ARG(state, 3, AC_ARG_VALUE, ac.num_work_groups, AC_UD_CS_GRID_SIZE); else RADV_ADD_UD_ARG(state, 2, AC_ARG_CONST_ADDR, ac.num_work_groups, AC_UD_CS_GRID_SIZE); @@ -660,7 +655,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi RADV_ADD_ARG(state, AC_ARG_SGPR, 1, AC_ARG_VALUE, ac.scratch_offset); } - if (pdev->info.compiler_info.local_invocation_ids_packed) { + if (compiler_info->ac->local_invocation_ids_packed) { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed); } else { RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_id_x); @@ -830,11 +825,11 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi declare_ngg_sgprs(state, info, ngg_needs_state_sgpr); - if (pdev->info.gfx_level >= GFX11 && has_shader_query) + if (gfx_level >= GFX11 && has_shader_query) RADV_ADD_UD_ARG(state, 1, AC_ARG_VALUE, ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA); } - if (previous_stage != MESA_SHADER_MESH || !pdev->info.mesh_fast_launch_2) { + if (previous_stage != MESA_SHADER_MESH || !compiler_info->hw.mesh_fast_launch_2) { if (gfx_level >= GFX12) { RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_vtx_offset, 0); RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_prim_id); @@ -854,7 +849,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi } else if (previous_stage == MESA_SHADER_TESS_EVAL) { declare_tes_input_vgprs(state); } else if (previous_stage == MESA_SHADER_MESH) { - declare_ms_input_vgprs(state, device); + declare_ms_input_vgprs(compiler_info, state); } } else { declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info); @@ -964,9 +959,10 @@ radv_gather_shader_args_debug_info(struct radv_shader_args_state *state, struct } void -radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state, - const struct radv_shader_info *info, mesa_shader_stage stage, mesa_shader_stage previous_stage, - struct radv_shader_args *args, struct radv_shader_debug_info *debug) +radv_declare_shader_args(const struct radv_compiler_info *compiler_info, + const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info, + mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args, + struct radv_shader_debug_info *debug) { struct radv_shader_args_state state = { .args = args, @@ -975,7 +971,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra struct user_sgpr_info user_sgpr_info = {0}; if (!mesa_shader_stage_is_rt(stage)) { - declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, NULL); + declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, NULL); uint32_t num_user_sgprs = args->num_user_sgprs; if (info->loads_push_constants) @@ -986,8 +982,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra num_user_sgprs++; } - const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_gfx_level gfx_level = pdev->info.gfx_level; + const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; uint32_t available_sgprs = gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16; uint32_t remaining_sgprs = available_sgprs - num_user_sgprs; @@ -1012,13 +1007,13 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra allocate_inline_push_consts(info, &user_sgpr_info); } - state.gather_debug_info = debug && device->keep_shader_info; + state.gather_debug_info = debug && compiler_info->debug.keep_shader_info; if (state.gather_debug_info) { state.ctx = ralloc_context(NULL); state.gather_debug_info &= !!state.ctx; } - declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, &user_sgpr_info); + declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, &user_sgpr_info); if (state.gather_debug_info) radv_gather_shader_args_debug_info(&state, debug); @@ -1027,14 +1022,14 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra } void -radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key, +radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key, struct radv_shader_args *args) { struct radv_shader_args_state state = { .args = args, }; - radv_init_shader_args(&state, device, MESA_SHADER_FRAGMENT); + radv_init_shader_args(compiler_info, &state, MESA_SHADER_FRAGMENT); /* Declare VGPR arguments for depth/stencil/sample exports. */ if (key->export_depth) diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h index 30cef5ef19e..ed8669e76f2 100644 --- a/src/amd/vulkan/radv_shader_args.h +++ b/src/amd/vulkan/radv_shader_args.h @@ -135,15 +135,15 @@ radv_shader_args_from_ac(struct ac_shader_args *args) struct radv_graphics_state_key; struct radv_shader_info; struct radv_ps_epilog_key; -struct radv_device; struct radv_shader_debug_info; +struct radv_compiler_info; -void radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state, - const struct radv_shader_info *info, mesa_shader_stage stage, - mesa_shader_stage previous_stage, struct radv_shader_args *args, +void radv_declare_shader_args(const struct radv_compiler_info *compiler_info, + const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info, + mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args, struct radv_shader_debug_info *debug); -void radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key, +void radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key, struct radv_shader_args *args); void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args); diff --git a/src/amd/vulkan/radv_shader_info.c b/src/amd/vulkan/radv_shader_info.c index 7f72ec97cf0..1df39bfb780 100644 --- a/src/amd/vulkan/radv_shader_info.c +++ b/src/amd/vulkan/radv_shader_info.c @@ -481,13 +481,12 @@ radv_compute_esgs_itemsize(enum amd_gfx_level gfx_level, uint32_t num_varyings) } static void -gather_shader_info_ngg_query(struct radv_device *device, struct radv_shader_info *info) +gather_shader_info_ngg_query(const struct radv_compiler_info *compiler_info, struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - - info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY; + info->gs.has_pipeline_stat_query = + compiler_info->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY; info->has_xfb_query = !!info->so.enabled_stream_buffers_mask; - info->has_prim_query = device->cache_key.primitives_generated_query || info->has_xfb_query; + info->has_prim_query = compiler_info->primitives_generated_query || info->has_xfb_query; } uint64_t @@ -525,12 +524,10 @@ radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t ni } static void -gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, +gather_shader_info_vs(const struct radv_compiler_info *compiler_info, const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - if (radv_use_vs_prolog(nir, gfx_state)) { info->vs.has_prolog = true; info->vs.dynamic_inputs = true; @@ -567,19 +564,18 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir, info->vs.as_ls = true; } else if (info->next_stage == MESA_SHADER_GEOMETRY) { info->vs.as_es = true; - info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->vs.num_linked_outputs); + info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->vs.num_linked_outputs); } if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) { - gather_shader_info_ngg_query(device, info); + gather_shader_info_ngg_query(compiler_info, info); } } static void -gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, +gather_shader_info_tcs(const struct radv_compiler_info *compiler_info, const nir_shader *nir, const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location; nir_tcs_info tcs_info; @@ -597,7 +593,7 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read)); if (gfx_state->ts.patch_control_points) { - radv_get_tess_wg_info(pdev, &info->tcs.io_info, nir->info.tess.tcs_vertices_out, + radv_get_tess_wg_info(compiler_info, &info->tcs.io_info, nir->info.tess.tcs_vertices_out, gfx_state->ts.patch_control_points, /* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */ info->tcs.num_linked_inputs, &info->num_tess_patches, &info->tcs.lds_size); @@ -605,10 +601,9 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir, } static void -gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) +gather_shader_info_tes(const struct radv_compiler_info *compiler_info, const nir_shader *nir, + struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - info->gs_inputs_read = ~0ULL; info->tes._primitive_mode = nir->info.tess._primitive_mode; info->tes.spacing = nir->info.tess.spacing; @@ -629,11 +624,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct if (info->next_stage == MESA_SHADER_GEOMETRY) { info->tes.as_es = true; - info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->tes.num_linked_outputs); + info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->tes.num_linked_outputs); } if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) { - gather_shader_info_ngg_query(device, info); + gather_shader_info_ngg_query(compiler_info, info); } } @@ -678,10 +673,9 @@ radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shade } void -radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, +radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info, struct radv_shader_info *gs_info) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info; const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize; ac_legacy_gs_subgroup_info info; @@ -689,7 +683,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations, esgs_vertex_stride, &info); - const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(pdev->info.gfx_level); + const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(compiler_info->ac->gfx_level); const uint32_t total_lds_bytes = align(info.esgs_lds_size * 4, lds_granularity); out->gs_inst_prims_in_subgroup = info.gs_inst_prims_in_subgroup; @@ -699,10 +693,9 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf } static void -gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) +gather_shader_info_gs(const struct radv_compiler_info *compiler_info, const nir_shader *nir, + struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); - info->gs.vertices_in = nir->info.gs.vertices_in; info->gs.vertices_out = nir->info.gs.vertices_out; info->gs.input_prim = nir->info.gs.input_primitive; @@ -713,13 +706,14 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct info->gs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read)); if (info->is_ngg) - gather_shader_info_ngg_query(device, info); + gather_shader_info_ngg_query(compiler_info, info); else - info->legacy_gs_info.esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->gs.num_linked_inputs); + info->legacy_gs_info.esgs_itemsize = + radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->gs.num_linked_inputs); } static void -gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir, +gather_shader_info_mesh(const struct radv_compiler_info *compiler_info, const nir_shader *nir, const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info) { struct gfx10_ngg_info *ngg_info = &info->ngg_info; @@ -760,17 +754,17 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir, ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out; ngg_info->vgt_esgs_ring_itemsize = 1; - info->ms.has_query = device->cache_key.mesh_shader_queries; + info->ms.has_query = compiler_info->mesh_shader_queries; info->ms.has_task = stage_key->has_task_shader; } static void -calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info) +calc_mesh_workgroup_size(const struct radv_compiler_info *compiler_info, const nir_shader *nir, + struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX); - if (pdev->info.mesh_fast_launch_2) { + if (compiler_info->hw.mesh_fast_launch_2) { /* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */ info->workgroup_size = api_workgroup_size; } else { @@ -884,7 +878,7 @@ gather_shader_info_rt(const nir_shader *nir, struct radv_shader_info *info) } static void -gather_shader_info_task(struct radv_device *device, const nir_shader *nir, +gather_shader_info_task(const struct radv_compiler_info *compiler_info, const nir_shader *nir, const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info) { /* Task shaders always need these for the I/O lowering even if the API shader doesn't actually @@ -906,7 +900,7 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir, info->cs.linear_taskmesh_dispatch = nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1; - info->cs.has_query = device->cache_key.mesh_shader_queries; + info->cs.has_query = compiler_info->mesh_shader_queries; } static uint32_t @@ -1000,12 +994,11 @@ radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage, } void -radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, +radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir, const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key, const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, struct radv_shader_info *info) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions); if (layout->use_dynamic_descriptors) { @@ -1060,11 +1053,11 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n /* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */ info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) || - (nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11); + (nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level < GFX11); info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) | BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) | - radv_shader_should_clear_lds(device, nir); + radv_shader_should_clear_lds(compiler_info, nir); info->cs.derivative_group = nir->info.derivative_group; if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK || @@ -1073,8 +1066,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n info->cs.block_size[i] = nir->info.workgroup_size[i]; } - info->user_data_0 = radv_get_user_data_0(pdev->info.gfx_level, info); - info->merged_shader_compiled_separately = radv_is_merged_shader_compiled_separately(pdev->info.gfx_level, info); + info->user_data_0 = radv_get_user_data_0(compiler_info->ac->gfx_level, info); + info->merged_shader_compiled_separately = + radv_is_merged_shader_compiled_separately(compiler_info->ac->gfx_level, info); info->force_indirect_descriptors = info->merged_shader_compiled_separately || stage_key->indirect_bindable; info->descriptor_heap = stage_key->descriptor_heap; @@ -1082,25 +1076,25 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n case MESA_SHADER_COMPUTE: break; case MESA_SHADER_TASK: - gather_shader_info_task(device, nir, stage_key, info); + gather_shader_info_task(compiler_info, nir, stage_key, info); break; case MESA_SHADER_FRAGMENT: - gather_shader_info_fs(pdev->info.gfx_level, nir, gfx_state, info); + gather_shader_info_fs(compiler_info->ac->gfx_level, nir, gfx_state, info); break; case MESA_SHADER_GEOMETRY: - gather_shader_info_gs(device, nir, info); + gather_shader_info_gs(compiler_info, nir, info); break; case MESA_SHADER_TESS_EVAL: - gather_shader_info_tes(device, nir, info); + gather_shader_info_tes(compiler_info, nir, info); break; case MESA_SHADER_TESS_CTRL: - gather_shader_info_tcs(device, nir, gfx_state, info); + gather_shader_info_tcs(compiler_info, nir, gfx_state, info); break; case MESA_SHADER_VERTEX: - gather_shader_info_vs(device, nir, gfx_state, stage_key, info); + gather_shader_info_vs(compiler_info, nir, gfx_state, stage_key, info); break; case MESA_SHADER_MESH: - gather_shader_info_mesh(device, nir, stage_key, info); + gather_shader_info_mesh(compiler_info, nir, stage_key, info); break; default: if (mesa_shader_stage_is_rt(nir->info.stage)) @@ -1111,7 +1105,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n info->wave_size = nir->info.min_subgroup_size; assert(info->wave_size == nir->info.max_subgroup_size); assert(info->wave_size == 32 || info->wave_size == 64); - assert(pdev->info.gfx_level >= GFX10 || info->wave_size == 64); + assert(compiler_info->ac->gfx_level >= GFX10 || info->wave_size == 64); assert(nir->info.stage != MESA_SHADER_GEOMETRY || info->is_ngg || info->wave_size == 64); switch (nir->info.stage) { @@ -1140,7 +1134,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n case MESA_SHADER_TESS_CTRL: if (gfx_state->ts.patch_control_points) { info->workgroup_size = - ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches, + ac_compute_lshs_workgroup_size(compiler_info->ac->gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches, gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out); } else { /* Set the maximum possible value when the workgroup size can't be determined. */ @@ -1168,7 +1162,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n * Set the maximum possible value by default, this will be optimized during linking if * possible. */ - if (pdev->info.gfx_level <= GFX8) + if (compiler_info->ac->gfx_level <= GFX8) info->workgroup_size = info->wave_size; else info->workgroup_size = 256; @@ -1181,7 +1175,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n } break; case MESA_SHADER_MESH: - calc_mesh_workgroup_size(device, nir, info); + calc_mesh_workgroup_size(compiler_info, nir, info); break; default: /* FS always operates without workgroups. Other stages are computed during linking but assume @@ -1255,11 +1249,10 @@ gfx10_get_ngg_vert_prim_lds_size(const struct radv_shader_info *es_info, const s } void -gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info, - struct gfx10_ngg_info *out) +gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info, + struct radv_shader_info *gs_info, struct gfx10_ngg_info *out) { - const struct radv_physical_device *pdev = radv_device_physical(device); - const enum amd_gfx_level gfx_level = pdev->info.gfx_level; + const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level; const struct radv_shader_info *stage_info = gs_info ? gs_info : es_info; const unsigned gs_num_invocations = gs_info ? MAX2(gs_info->gs.invocations, 1) : 1; const unsigned input_prim = radv_get_pre_rast_input_topology(es_info, gs_info); @@ -1301,10 +1294,9 @@ gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_s } static void -radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *ngg_stage, +radv_determine_ngg_settings(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage, struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state) { - const struct radv_physical_device *pdev = radv_device_physical(device); uint64_t ps_inputs_read; assert(ngg_stage->info.is_ngg); @@ -1337,11 +1329,12 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage } ngg_stage->info.has_ngg_culling = - radv_consider_culling(pdev, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info); + radv_consider_culling(compiler_info, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info); if (ngg_stage->stage != MESA_SHADER_GEOMETRY) { nir_function_impl *impl = nir_shader_get_entrypoint(ngg_stage->nir); - ngg_stage->info.has_ngg_early_prim_export = pdev->info.gfx_level < GFX11 && exec_list_is_singular(&impl->body); + ngg_stage->info.has_ngg_early_prim_export = + compiler_info->ac->gfx_level < GFX11 && exec_list_is_singular(&impl->body); /* NGG passthrough mode should be disabled when culling and when the vertex shader * exports the primitive ID. @@ -1353,10 +1346,9 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage } static void -radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *stages, +radv_link_shaders_info(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state) { - const struct radv_physical_device *pdev = radv_device_physical(device); struct radv_shader_stage *vs_stage = stages[MESA_SHADER_VERTEX].nir ? &stages[MESA_SHADER_VERTEX] : NULL; struct radv_shader_stage *tcs_stage = stages[MESA_SHADER_TESS_CTRL].nir ? &stages[MESA_SHADER_TESS_CTRL] : NULL; struct radv_shader_stage *tes_stage = stages[MESA_SHADER_TESS_EVAL].nir ? &stages[MESA_SHADER_TESS_EVAL] : NULL; @@ -1382,15 +1374,15 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta const bool ps_prim_id_in = !fs_stage || fs_stage->info.ps.prim_id_input; const bool ps_clip_dists_in = !fs_stage || !!fs_stage->info.ps.input_clips_culls_mask; - radv_set_vs_output_param(pdev->info.gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info, ps_prim_id_in, - ps_clip_dists_in); + radv_set_vs_output_param(compiler_info->ac->gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info, + ps_prim_id_in, ps_clip_dists_in); } if (prerast_stage && !ms_stage) { /* Compute NGG info (GFX10+) or GS info. */ if (ngg_stage) { /* Determine other NGG settings like culling. */ - radv_determine_ngg_settings(device, ngg_stage, fs_stage, gfx_state); + radv_determine_ngg_settings(compiler_info, ngg_stage, fs_stage, gfx_state); if (es_stage) { gfx10_ngg_set_esgs_ring_itemsize(&es_stage->info, gs_stage ? &gs_stage->info : NULL, @@ -1412,11 +1404,11 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta vs_stage->info.vs.tcs_inputs_via_lds = tcs_stage->nir->info.inputs_read; if (gfx_state->ts.patch_control_points) { - vs_stage->info.workgroup_size = - ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, - gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out); + vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size( + compiler_info->ac->gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches, + gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out); - if (!pdev->use_llvm) { + if (!compiler_info->debug.use_llvm) { /* When the number of TCS input and output vertices are the same (typically 3): * - There is an equal amount of LS and HS invocations * - In case of merged LSHS shaders, the LS and HS halves of the shader always process @@ -1427,7 +1419,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta * instruction dominating another with a different mode. */ vs_stage->info.vs.tcs_in_out_eq = - pdev->info.gfx_level >= GFX9 && + compiler_info->ac->gfx_level >= GFX9 && gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out && vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode; @@ -1493,14 +1485,12 @@ radv_nir_shader_info_merge(const struct radv_shader_stage *src, struct radv_shad } void -radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, - struct radv_shader_stage *stages) +radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info, + const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages) { - const struct radv_physical_device *pdev = radv_device_physical(device); + radv_link_shaders_info(compiler_info, stages, gfx_state); - radv_link_shaders_info(device, stages, gfx_state); - - if (pdev->info.gfx_level >= GFX9) { + if (compiler_info->ac->gfx_level >= GFX9) { /* Merge shader info for VS+TCS. */ if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) { radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]); diff --git a/src/amd/vulkan/radv_shader_info.h b/src/amd/vulkan/radv_shader_info.h index dfe10568220..b2950b54891 100644 --- a/src/amd/vulkan/radv_shader_info.h +++ b/src/amd/vulkan/radv_shader_info.h @@ -327,7 +327,7 @@ struct radv_shader_regs { void radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage, struct radv_shader_info *info); -void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir, +void radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir, const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key, const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type, bool consider_force_vrs, @@ -335,16 +335,17 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad void radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shader_regs *regs, const struct radv_shader_info *es_info, const struct radv_shader_info *gs_info); -void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info); +void radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info, + struct radv_shader_info *gs_info); -void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, +void gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info, struct radv_shader_info *gs_info, struct gfx10_ngg_info *out); void gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_shader_info *gs_info, struct gfx10_ngg_info *out); -void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state, - struct radv_shader_stage *stages); +void radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info, + const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages); enum ac_hw_stage radv_select_hw_stage(const struct radv_shader_info *const info, const enum amd_gfx_level gfx_level); diff --git a/src/amd/vulkan/radv_shader_object.c b/src/amd/vulkan/radv_shader_object.c index 198e47872cc..1c1dcc7a68e 100644 --- a/src/amd/vulkan/radv_shader_object.c +++ b/src/amd/vulkan/radv_shader_object.c @@ -170,7 +170,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0}; struct radv_shader_debug_info gs_copy_debug = {0}; - radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries, + radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries, &gs_copy_debug, &shader_obj->gs.copy_binary); radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader, shader_obj->gs.copy_binary, &gs_copy_debug); @@ -202,7 +202,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r radv_shader_stage_init(pCreateInfo, &stages[stage]); stages[stage].next_stage = next_stage; - radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, + radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries, &gs_copy_debug, &shader_obj->gs.copy_binary); radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader, shader_obj->gs.copy_binary, &gs_copy_debug); @@ -251,7 +251,7 @@ radv_shader_object_init_compute(struct radv_shader_object *shader_obj, struct ra radv_shader_stage_init(pCreateInfo, &stage); struct radv_shader_debug_info cs_dbg = {0}; - struct radv_shader_binary *cs_binary = radv_compile_cs(device, &stage, false, false, false, &cs_dbg); + struct radv_shader_binary *cs_binary = radv_compile_cs(&device->compiler_info, &stage, false, false, false, &cs_dbg); struct radv_shader *cs_shader = radv_shader_create(device, NULL, cs_binary, true, &cs_dbg); ralloc_free(stage.nir); @@ -474,7 +474,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con struct radv_shader_binary *gs_copy_binary = NULL; struct radv_shader_debug_info gs_copy_debug = {0}; - radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries, + radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries, &gs_copy_debug, &gs_copy_binary); radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &gs_copy_shader, gs_copy_binary, &gs_copy_debug);