mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-07 09:18:04 +02:00
radv: use radv_compiler_info everywhere during compilation
This prevents the compiler to access the logical/physical devices and the instance during compilation. The main goal is to make it more robust against cache related issues when something isn't hashed correctly (this used to happen a lot in the past). Also it would be much more robust for sharing binaries between two GPUs in the same generation (eg. Vangogh/Rembrandt) because everything needed for compilation is in radv_compiler_info. There is still some work to do to achieve that but it's making good progress. Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40992>
This commit is contained in:
parent
4a91fd8bab
commit
371316e989
39 changed files with 831 additions and 929 deletions
|
|
@ -17,17 +17,17 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
typedef struct nir_shader nir_shader;
|
||||
struct radeon_info;
|
||||
struct radv_shader_stage;
|
||||
struct radv_shader_info;
|
||||
struct radv_shader_args;
|
||||
struct radv_shader_layout;
|
||||
struct radv_device;
|
||||
struct radv_graphics_state_key;
|
||||
struct radv_ps_epilog_key;
|
||||
struct radv_debug_nir;
|
||||
struct radv_compiler_info;
|
||||
|
||||
bool radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage);
|
||||
bool radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *stage);
|
||||
|
||||
bool radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, const struct radv_shader_stage *stage,
|
||||
const struct radv_graphics_state_key *gfx_state, uint32_t address32_hi);
|
||||
|
|
@ -36,10 +36,11 @@ bool radv_nir_lower_hit_attrib_derefs(nir_shader *shader);
|
|||
|
||||
bool radv_nir_lower_ray_payload_derefs(nir_shader *shader, uint32_t offset);
|
||||
|
||||
bool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device);
|
||||
bool radv_nir_lower_ray_queries(nir_shader *shader, const struct radv_compiler_info *compiler_info);
|
||||
|
||||
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info);
|
||||
bool radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *vs_stage,
|
||||
const struct radv_graphics_state_key *gfx_state);
|
||||
|
||||
bool radv_nir_optimize_vs_inputs_to_const(nir_shader *shader, const struct radv_graphics_state_key *gfx_state);
|
||||
|
||||
|
|
@ -63,7 +64,7 @@ unsigned radv_map_io_driver_location(unsigned semantic);
|
|||
|
||||
void radv_nir_lower_io(nir_shader *nir);
|
||||
|
||||
bool radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage);
|
||||
bool radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage);
|
||||
|
||||
bool radv_nir_lower_cooperative_matrix(nir_shader *shader, enum amd_gfx_level gfx_level, unsigned wave_size);
|
||||
|
||||
|
|
@ -95,7 +96,7 @@ bool radv_nir_opt_tid_function(nir_shader *shader, const radv_nir_opt_tid_functi
|
|||
bool radv_nir_opt_fs_builtins(nir_shader *shader, const struct radv_graphics_state_key *gfx_state,
|
||||
unsigned vgt_outprim_type);
|
||||
|
||||
bool radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device,
|
||||
bool radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *stage);
|
||||
|
||||
void radv_nir_lower_callee_signature(nir_function *function);
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@
|
|||
#include "nir_builder.h"
|
||||
#include "radv_descriptor_set.h"
|
||||
#include "radv_descriptors.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_shader.h"
|
||||
#include "radv_shader_args.h"
|
||||
#include "sid.h"
|
||||
|
|
@ -676,21 +674,22 @@ lower_descriptors_tex(nir_builder *b, lower_descriptors_state *state, nir_tex_in
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_descriptors(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
|
||||
radv_nir_lower_descriptors(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *stage)
|
||||
{
|
||||
bool progress = false;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
lower_descriptors_state state = {
|
||||
.gfx_level = pdev->info.gfx_level,
|
||||
.address32_hi = pdev->info.address32_hi,
|
||||
.sampled_image_desc_size = radv_get_sampled_image_desc_size(pdev),
|
||||
.combined_image_sampler_desc_size = radv_get_combined_image_sampler_desc_size(pdev),
|
||||
.combined_image_sampler_offset = radv_get_combined_image_sampler_offset(pdev),
|
||||
.disable_aniso_single_level = pdev->cache_key.disable_aniso_single_level,
|
||||
.has_image_load_dcc_bug = pdev->info.compiler_info.has_image_load_dcc_bug,
|
||||
.gfx_level = compiler_info->ac->gfx_level,
|
||||
.address32_hi = compiler_info->hw.address32_hi,
|
||||
.sampled_image_desc_size = compiler_info->sampled_image_desc_size,
|
||||
.combined_image_sampler_desc_size = compiler_info->combined_image_sampler_desc_size,
|
||||
.combined_image_sampler_offset = compiler_info->combined_image_sampler_offset,
|
||||
.disable_aniso_single_level =
|
||||
compiler_info->cache_key->disable_aniso_single_level && compiler_info->ac->gfx_level < GFX8,
|
||||
.has_image_load_dcc_bug = compiler_info->ac->has_image_load_dcc_bug,
|
||||
.disable_tg4_trunc_coord =
|
||||
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
|
||||
!compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord,
|
||||
.args = &stage->args,
|
||||
.info = &stage->info,
|
||||
.layout = &stage->layout,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_descriptor_set.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_sampler.h"
|
||||
|
|
@ -19,8 +18,7 @@
|
|||
* forbidden).
|
||||
*/
|
||||
typedef struct {
|
||||
const struct radv_device *device;
|
||||
bool disable_tg4_trunc_coord;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
const struct radv_shader_layout *layout;
|
||||
} lower_immediate_samplers_state;
|
||||
|
||||
|
|
@ -31,8 +29,10 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
|
|||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
const bool disable_tg4_trunc_coord =
|
||||
!state->compiler_info->ac->conformant_trunc_coord && !state->compiler_info->cache_key->disable_trunc_coord;
|
||||
const uint32_t dword0_mask =
|
||||
tex->op == nir_texop_tg4 && state->disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu;
|
||||
tex->op == nir_texop_tg4 && disable_tg4_trunc_coord ? C_008F30_TRUNC_COORD : 0xffffffffu;
|
||||
|
||||
if (tex->embedded_sampler) {
|
||||
const struct vk_sampler_state_array *embedded_samplers = &state->layout->embedded_samplers;
|
||||
|
|
@ -42,7 +42,7 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
|
|||
assert(sampler_idx < embedded_samplers->sampler_count);
|
||||
const struct vk_sampler_state *sampler_state = &embedded_samplers->samplers[sampler_idx];
|
||||
|
||||
radv_make_sampler_descriptor(state->device, sampler_state, desc);
|
||||
radv_make_sampler_descriptor(state->compiler_info, sampler_state, desc);
|
||||
|
||||
nir_tex_instr_add_src(tex, nir_tex_src_sampler_handle,
|
||||
nir_imm_ivec4(b, desc[0] & dword0_mask, desc[1], desc[2], desc[3]));
|
||||
|
|
@ -89,14 +89,11 @@ lower_immediate_samplers(nir_builder *b, nir_tex_instr *tex, void *cb_data)
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_immediate_samplers(nir_shader *shader, struct radv_device *device, const struct radv_shader_stage *stage)
|
||||
radv_nir_lower_immediate_samplers(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
lower_immediate_samplers_state state = {
|
||||
.device = device,
|
||||
.disable_tg4_trunc_coord =
|
||||
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
|
||||
.compiler_info = compiler_info,
|
||||
.layout = &stage->layout,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@
|
|||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
#include "nir_tcs_info.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_nir.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
static int
|
||||
|
|
@ -85,9 +83,8 @@ radv_map_io_driver_location(unsigned semantic)
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *stage)
|
||||
radv_nir_lower_io_to_mem(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_shader_info *info = &stage->info;
|
||||
ac_nir_map_io_driver_location map_input = info->inputs_linked ? NULL : radv_map_io_driver_location;
|
||||
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
|
||||
|
|
@ -95,16 +92,16 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
|
|||
|
||||
if (nir->info.stage == MESA_SHADER_VERTEX) {
|
||||
if (info->vs.as_ls) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
|
||||
info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
|
||||
NIR_PASS(_, nir, ac_nir_lower_ls_outputs_to_mem, map_output, compiler_info->ac->gfx_level,
|
||||
info->vs.tcs_in_out_eq, info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
|
||||
return true;
|
||||
} else if (info->vs.as_es) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize,
|
||||
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize,
|
||||
info->gs_inputs_read);
|
||||
return true;
|
||||
}
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, pdev->info.gfx_level, info->vs.tcs_in_out_eq,
|
||||
NIR_PASS(_, nir, ac_nir_lower_hs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, info->vs.tcs_in_out_eq,
|
||||
info->vs.tcs_inputs_via_temp, info->vs.tcs_inputs_via_lds);
|
||||
|
||||
nir_tcs_info tcs_info;
|
||||
|
|
@ -113,21 +110,21 @@ radv_nir_lower_io_to_mem(struct radv_device *device, struct radv_shader_stage *s
|
|||
ac_nir_get_tess_io_info(nir, &tcs_info, info->tcs.tes_inputs_read, info->tcs.tes_patch_inputs_read, map_output,
|
||||
true, &tess_io_info);
|
||||
|
||||
NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output, pdev->info.gfx_level,
|
||||
info->wave_size);
|
||||
NIR_PASS(_, nir, ac_nir_lower_hs_outputs_to_mem, &tcs_info, &tess_io_info, map_output,
|
||||
compiler_info->ac->gfx_level, info->wave_size);
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TESS_EVAL) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_tes_inputs_to_mem, map_input);
|
||||
|
||||
if (info->tes.as_es) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, pdev->info.gfx_level, info->esgs_itemsize,
|
||||
NIR_PASS(_, nir, ac_nir_lower_es_outputs_to_mem, map_output, compiler_info->ac->gfx_level, info->esgs_itemsize,
|
||||
info->gs_inputs_read);
|
||||
}
|
||||
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, pdev->info.gfx_level, false);
|
||||
NIR_PASS(_, nir, ac_nir_lower_gs_inputs_to_mem, map_input, compiler_info->ac->gfx_level, false);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_lower_task_outputs_to_mem(nir, info->cs.has_query);
|
||||
|
|
|
|||
|
|
@ -154,7 +154,7 @@ struct ray_query_vars {
|
|||
|
||||
static void
|
||||
init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray_query_vars *dst, const char *base_name,
|
||||
const struct radv_physical_device *pdev)
|
||||
const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
memset(dst, 0, sizeof(*dst));
|
||||
|
||||
|
|
@ -162,23 +162,23 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
|
|||
shader->info.workgroup_size[0] * shader->info.workgroup_size[1] * shader->info.workgroup_size[2];
|
||||
uint32_t shared_stack_entries = shader->info.ray_queries == 1 ? 16 : 8;
|
||||
/* ds_bvh_stack* instructions use a fixed stride of 32 dwords. */
|
||||
if (radv_use_bvh_stack_rtn(pdev))
|
||||
if (radv_use_bvh_stack_rtn(compiler_info))
|
||||
workgroup_size = align(workgroup_size, 32);
|
||||
uint32_t shared_stack_size = workgroup_size * shared_stack_entries * 4;
|
||||
uint32_t shared_offset = align(shader->info.shared_size, 4);
|
||||
|
||||
if (shader->info.stage != MESA_SHADER_COMPUTE || glsl_type_is_array(opaque_type) ||
|
||||
shared_offset + shared_stack_size > pdev->info.lds_size_per_workgroup) {
|
||||
shared_offset + shared_stack_size > compiler_info->hw.lds_size_per_workgroup) {
|
||||
dst->stack_entries = MAX_SCRATCH_STACK_ENTRY_COUNT;
|
||||
} else {
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
if (radv_use_bvh_stack_rtn(compiler_info)) {
|
||||
/* The hardware ds_bvh_stack_rtn address can only encode a stack base up to 8191 dwords, or 16383 dwords on
|
||||
* gfx12+.
|
||||
*/
|
||||
uint32_t num_wave32_groups = workgroup_size / 32;
|
||||
uint32_t max_group_stack_base = (num_wave32_groups - 1) * 32 * shared_stack_entries;
|
||||
uint32_t max_stack_base = (shared_offset / 4) + max_group_stack_base;
|
||||
uint32_t max_hw_stack_base = pdev->info.gfx_level >= GFX12 ? 16384 : 8192;
|
||||
uint32_t max_hw_stack_base = compiler_info->ac->gfx_level >= GFX12 ? 16384 : 8192;
|
||||
dst->use_bvh_stack_rtn = max_stack_base < max_hw_stack_base;
|
||||
}
|
||||
dst->shared_stack = true;
|
||||
|
|
@ -194,11 +194,11 @@ init_ray_query_vars(nir_shader *shader, const glsl_type *opaque_type, struct ray
|
|||
|
||||
static void
|
||||
lower_ray_query(nir_shader *shader, nir_variable *ray_query, struct hash_table *ht,
|
||||
const struct radv_physical_device *pdev)
|
||||
const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
struct ray_query_vars *vars = ralloc(ht, struct ray_query_vars);
|
||||
|
||||
init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, pdev);
|
||||
init_ray_query_vars(shader, ray_query->type, vars, ray_query->name == NULL ? "" : ray_query->name, compiler_info);
|
||||
|
||||
_mesa_hash_table_insert(ht, ray_query, vars);
|
||||
}
|
||||
|
|
@ -266,10 +266,8 @@ enum rq_intersection_type { intersection_type_none, intersection_type_triangle,
|
|||
|
||||
static void
|
||||
lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
|
||||
struct radv_device *device)
|
||||
const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_deref_instr *closest = rq_deref(b, rq, closest);
|
||||
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
|
||||
|
||||
|
|
@ -307,7 +305,7 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
bvh_offset = nir_if_phi(b, bvh_offset, zero);
|
||||
|
||||
nir_def *bvh_base = nir_iadd(b, accel_struct, nir_u2u64(b, bvh_offset));
|
||||
bvh_base = build_addr_to_node(device, b, bvh_base, instr->src[2].ssa);
|
||||
bvh_base = build_addr_to_node(compiler_info, b, bvh_base, instr->src[2].ssa);
|
||||
|
||||
rq_store(b, rq, root_bvh_base, bvh_base);
|
||||
rq_store(b, rq, trav_bvh_base, bvh_base);
|
||||
|
|
@ -317,8 +315,8 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
if (vars->use_bvh_stack_rtn) {
|
||||
uint32_t workgroup_size =
|
||||
b->shader->info.workgroup_size[0] * b->shader->info.workgroup_size[1] * b->shader->info.workgroup_size[2];
|
||||
nir_def *addr =
|
||||
radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, workgroup_size, vars->shared_base, vars->stack_entries);
|
||||
nir_def *addr = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, workgroup_size, vars->shared_base,
|
||||
vars->stack_entries);
|
||||
rq_store(b, rq, trav_stack, addr);
|
||||
rq_store(b, rq, trav_stack_low_watermark, addr);
|
||||
} else {
|
||||
|
|
@ -340,13 +338,14 @@ lower_rq_initialize(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query
|
|||
|
||||
rq_store(b, rq, trav_top_stack, nir_imm_int(b, -1));
|
||||
|
||||
rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !pdev->cache_key.no_rt));
|
||||
rq_store(b, rq, incomplete, nir_iand_imm(b, accel_struct_non_null, !compiler_info->cache_key->no_rt));
|
||||
|
||||
vars->initialize = instr;
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *instr, nir_deref_instr *rq)
|
||||
lower_rq_load(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_intrinsic_instr *instr,
|
||||
nir_deref_instr *rq)
|
||||
{
|
||||
bool committed = nir_intrinsic_committed(instr);
|
||||
|
||||
|
|
@ -369,24 +368,24 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
case nir_ray_query_value_intersection_geometry_index:
|
||||
return nir_iand_imm(b, isec_load(b, intersection, geometry_id_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_instance_custom_index:
|
||||
return radv_load_custom_instance(device, b, isec_load(b, intersection, instance_addr));
|
||||
return radv_load_custom_instance(compiler_info, b, isec_load(b, intersection, instance_addr));
|
||||
case nir_ray_query_value_intersection_instance_id:
|
||||
return radv_load_instance_id(device, b, isec_load(b, intersection, instance_addr));
|
||||
return radv_load_instance_id(compiler_info, b, isec_load(b, intersection, instance_addr));
|
||||
case nir_ray_query_value_intersection_instance_sbt_index:
|
||||
return nir_iand_imm(b, isec_load(b, intersection, sbt_offset_and_flags), 0xFFFFFF);
|
||||
case nir_ray_query_value_intersection_object_ray_direction: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load(b, rq, direction), wto_matrix, false);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_ray_origin: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
return nir_build_vec3_mat_mult(b, rq_load(b, rq, origin), wto_matrix, true);
|
||||
}
|
||||
case nir_ray_query_value_intersection_object_to_world: {
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(device, b, isec_load(b, intersection, instance_addr), otw_matrix);
|
||||
radv_load_otw_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), otw_matrix);
|
||||
return nir_vec3(b, nir_channel(b, otw_matrix[0], column), nir_channel(b, otw_matrix[1], column),
|
||||
nir_channel(b, otw_matrix[2], column));
|
||||
}
|
||||
|
|
@ -403,7 +402,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
}
|
||||
case nir_ray_query_value_intersection_world_to_object: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(device, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, isec_load(b, intersection, instance_addr), wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -419,7 +418,7 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
return rq_load(b, rq, origin);
|
||||
case nir_ray_query_value_intersection_triangle_vertex_positions: {
|
||||
nir_def *primitive_addr = isec_load(b, intersection, primitive_addr);
|
||||
return radv_load_vertex_position(device, b, primitive_addr, nir_intrinsic_column(instr));
|
||||
return radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(instr));
|
||||
}
|
||||
default:
|
||||
UNREACHABLE("Invalid nir_ray_query_value!");
|
||||
|
|
@ -429,7 +428,6 @@ lower_rq_load(struct radv_device *device, nir_builder *b, nir_intrinsic_instr *i
|
|||
}
|
||||
|
||||
struct traversal_data {
|
||||
const struct radv_device *device;
|
||||
struct ray_query_vars *vars;
|
||||
nir_deref_instr *rq;
|
||||
};
|
||||
|
|
@ -510,10 +508,8 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
|
|||
|
||||
static nir_def *
|
||||
lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_vars *vars, nir_deref_instr *rq,
|
||||
struct radv_device *device)
|
||||
const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_deref_instr *closest = rq_deref(b, rq, closest);
|
||||
nir_deref_instr *candidate = rq_deref(b, rq, candidate);
|
||||
|
||||
|
|
@ -550,7 +546,6 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
|
|||
};
|
||||
|
||||
struct traversal_data data = {
|
||||
.device = device,
|
||||
.vars = vars,
|
||||
.rq = rq,
|
||||
};
|
||||
|
|
@ -590,10 +585,10 @@ lower_rq_proceed(nir_builder *b, nir_intrinsic_instr *instr, struct ray_query_va
|
|||
nir_push_if(b, rq_load(b, rq, incomplete));
|
||||
{
|
||||
nir_def *incomplete;
|
||||
if (pdev->cache_key.bvh8)
|
||||
incomplete = radv_build_ray_traversal_gfx12(device, b, &args);
|
||||
if (compiler_info->cache_key->bvh8)
|
||||
incomplete = radv_build_ray_traversal_gfx12(compiler_info, b, &args);
|
||||
else
|
||||
incomplete = radv_build_ray_traversal(device, b, &args);
|
||||
incomplete = radv_build_ray_traversal(compiler_info, b, &args);
|
||||
rq_store(b, rq, incomplete, nir_iand(b, rq_load(b, rq, incomplete), incomplete));
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
|
@ -618,10 +613,8 @@ radv_lower_opaque_ray_query_deref(nir_builder *b, nir_deref_instr *opaque_deref,
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device)
|
||||
radv_nir_lower_ray_queries(struct nir_shader *shader, const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
bool progress = false;
|
||||
struct hash_table *query_ht = _mesa_pointer_hash_table_create(NULL);
|
||||
|
||||
|
|
@ -629,7 +622,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
if (!var->data.ray_query)
|
||||
continue;
|
||||
|
||||
lower_ray_query(shader, var, query_ht, pdev);
|
||||
lower_ray_query(shader, var, query_ht, compiler_info);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
|
@ -641,7 +634,7 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
if (!var->data.ray_query)
|
||||
continue;
|
||||
|
||||
lower_ray_query(shader, var, query_ht, pdev);
|
||||
lower_ray_query(shader, var, query_ht, compiler_info);
|
||||
|
||||
progress = true;
|
||||
}
|
||||
|
|
@ -676,13 +669,13 @@ radv_nir_lower_ray_queries(struct nir_shader *shader, struct radv_device *device
|
|||
lower_rq_generate_intersection(&builder, intrinsic, rq);
|
||||
break;
|
||||
case nir_intrinsic_rq_initialize:
|
||||
lower_rq_initialize(&builder, intrinsic, vars, rq, device);
|
||||
lower_rq_initialize(&builder, intrinsic, vars, rq, compiler_info);
|
||||
break;
|
||||
case nir_intrinsic_rq_load:
|
||||
new_dest = lower_rq_load(device, &builder, intrinsic, rq);
|
||||
new_dest = lower_rq_load(compiler_info, &builder, intrinsic, rq);
|
||||
break;
|
||||
case nir_intrinsic_rq_proceed:
|
||||
new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, device);
|
||||
new_dest = lower_rq_proceed(&builder, intrinsic, vars, rq, compiler_info);
|
||||
break;
|
||||
case nir_intrinsic_rq_terminate:
|
||||
lower_rq_terminate(&builder, intrinsic, rq);
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
* SPDX-License-Identifier: MIT
|
||||
*/
|
||||
|
||||
#include "ac_gpu_info.h"
|
||||
#include "ac_nir.h"
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
|
@ -18,7 +17,7 @@ typedef struct {
|
|||
const struct radv_shader_args *args;
|
||||
const struct radv_shader_info *info;
|
||||
const struct radv_graphics_state_key *gfx_state;
|
||||
const struct radeon_info *gpu_info;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
} lower_vs_inputs_state;
|
||||
|
||||
static nir_def *
|
||||
|
|
@ -271,12 +270,13 @@ lower_load_vs_input(nir_builder *b, nir_intrinsic_instr *intrin, lower_vs_inputs
|
|||
const enum pipe_format attrib_format = adjust_format(s->gfx_state->vi.vertex_attribute_formats[location]);
|
||||
const struct util_format_description *f = util_format_description(attrib_format);
|
||||
const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(
|
||||
s->gpu_info->gfx_level, s->gpu_info->compiler_info.has_vtx_format_alpha_adjust_bug, attrib_format);
|
||||
s->compiler_info->ac->gfx_level, s->compiler_info->ac->has_vtx_format_alpha_adjust_bug, attrib_format);
|
||||
const unsigned binding_index = s->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
|
||||
const unsigned desc_index = util_bitcount(s->info->vs.vb_desc_usage_mask & BITFIELD_MASK(binding_index));
|
||||
|
||||
nir_def *vertex_buffers_arg = ac_nir_load_arg(b, &s->args->ac, s->args->ac.vertex_buffers);
|
||||
nir_def *vertex_buffers = nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->gpu_info->address32_hi));
|
||||
nir_def *vertex_buffers =
|
||||
nir_pack_64_2x32_split(b, vertex_buffers_arg, nir_imm_int(b, s->compiler_info->hw.address32_hi));
|
||||
nir_def *descriptor =
|
||||
ac_nir_load_smem(b, 4, vertex_buffers, nir_imm_int(b, desc_index * 16), 4, ACCESS_CAN_SPECULATE);
|
||||
nir_def *base_index = calc_vs_input_index(b, location, s);
|
||||
|
|
@ -453,8 +453,8 @@ lower_vs_input_instr(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
}
|
||||
|
||||
bool
|
||||
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_stage,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radeon_info *gpu_info)
|
||||
radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_shader_stage *vs_stage, const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
assert(shader->info.stage == MESA_SHADER_VERTEX);
|
||||
|
||||
|
|
@ -462,7 +462,7 @@ radv_nir_lower_vs_inputs(nir_shader *shader, const struct radv_shader_stage *vs_
|
|||
.info = &vs_stage->info,
|
||||
.args = &vs_stage->args,
|
||||
.gfx_state = gfx_state,
|
||||
.gpu_info = gpu_info,
|
||||
.compiler_info = compiler_info,
|
||||
};
|
||||
|
||||
return nir_shader_intrinsics_pass(shader, lower_vs_input_instr, nir_metadata_control_flow, &state);
|
||||
|
|
|
|||
|
|
@ -9,18 +9,19 @@
|
|||
#include "nir_builder.h"
|
||||
#include "radv_debug.h"
|
||||
|
||||
static nir_def *build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and);
|
||||
static nir_def *build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and);
|
||||
|
||||
bool
|
||||
radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice)
|
||||
radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
/* gfx12 requires using the bvh4 ds_bvh_stack_rtn differently - enable hw stack instrs on gfx12 only with bvh8 */
|
||||
return ((pdevice->info.gfx_level >= GFX11 && pdevice->info.gfx_level < GFX12) || pdevice->cache_key.bvh8) &&
|
||||
!pdevice->cache_key.emulate_rt;
|
||||
return ((compiler_info->ac->gfx_level >= GFX11 && compiler_info->ac->gfx_level < GFX12) ||
|
||||
compiler_info->cache_key->bvh8) &&
|
||||
!compiler_info->cache_key->emulate_rt;
|
||||
}
|
||||
|
||||
nir_def *
|
||||
radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev, uint32_t workgroup_size,
|
||||
radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info, uint32_t workgroup_size,
|
||||
uint32_t stack_base, uint32_t max_stack_entries)
|
||||
{
|
||||
assert(stack_base % 4 == 0);
|
||||
|
|
@ -41,7 +42,7 @@ radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct r
|
|||
}
|
||||
stack_idx = nir_iadd_imm(b, stack_idx, stack_base / 4);
|
||||
/* There are 4 bytes in each stack entry so no further arithmetic is needed. */
|
||||
if (pdev->info.gfx_level >= GFX12)
|
||||
if (compiler_info->ac->gfx_level >= GFX12)
|
||||
stack_idx = nir_ishl_imm(b, stack_idx, 15);
|
||||
else
|
||||
stack_idx = nir_ishl_imm(b, stack_idx, 18);
|
||||
|
|
@ -72,7 +73,7 @@ nir_sort_hit_pair(nir_builder *b, nir_variable *var_distances, nir_variable *var
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
intersect_ray_amd_software_box(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
|
||||
|
|
@ -81,7 +82,7 @@ intersect_ray_amd_software_box(struct radv_device *device, nir_builder *b, nir_d
|
|||
unsigned old_math_ctrl = b->fp_math_ctrl;
|
||||
b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf;
|
||||
|
||||
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
|
||||
|
||||
/* vec4 distances = vec4(INF, INF, INF, INF); */
|
||||
nir_variable *distances = nir_variable_create(b->shader, nir_var_shader_temp, vec4_type, "distances");
|
||||
|
|
@ -190,7 +191,7 @@ radv_build_intersect_vertex(nir_builder *b, nir_def *v0_x, nir_def *v1_x, nir_de
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
intersect_ray_amd_software_tri(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh_node, nir_def *ray_tmax,
|
||||
nir_def *origin, nir_def *dir, nir_def *inv_dir)
|
||||
{
|
||||
const struct glsl_type *vec4_type = glsl_vector_type(GLSL_TYPE_FLOAT, 4);
|
||||
|
|
@ -198,7 +199,7 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d
|
|||
unsigned old_math_ctrl = b->fp_math_ctrl;
|
||||
b->fp_math_ctrl |= nir_fp_exact | nir_fp_preserve_nan | nir_fp_preserve_inf;
|
||||
|
||||
nir_def *node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
nir_def *node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
|
||||
|
||||
const uint32_t coord_offsets[3] = {
|
||||
offsetof(struct radv_bvh_triangle_node, coords[0]),
|
||||
|
|
@ -379,15 +380,13 @@ intersect_ray_amd_software_tri(struct radv_device *device, nir_builder *b, nir_d
|
|||
}
|
||||
|
||||
nir_def *
|
||||
build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags)
|
||||
build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
const uint64_t bvh_size = 1ull << 42;
|
||||
nir_def *node = nir_ushr_imm(b, addr, 3);
|
||||
node = nir_iand_imm(b, node, (bvh_size - 1) << 3);
|
||||
|
||||
if (pdev->cache_key.bvh8) {
|
||||
if (compiler_info->cache_key->bvh8) {
|
||||
/* The HW ray flags are the same bits as the API flags.
|
||||
* - SpvRayFlagsTerminateOnFirstHitKHRMask, SpvRayFlagsSkipClosestHitShaderKHRMask are handled in shader code.
|
||||
* - SpvRayFlagsSkipTrianglesKHRMask, SpvRayFlagsSkipAABBsKHRMask do not work.
|
||||
|
|
@ -403,14 +402,13 @@ build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, ni
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
build_node_to_addr(struct radv_device *device, nir_builder *b, nir_def *node, bool skip_type_and)
|
||||
build_node_to_addr(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *node, bool skip_type_and)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_def *addr = skip_type_and ? node : nir_iand_imm(b, node, ~7ull);
|
||||
addr = nir_ishl_imm(b, addr, 3);
|
||||
/* Assumes everything is in the top half of address space, which is true in
|
||||
* GFX9+ for now. */
|
||||
return pdev->info.gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
|
||||
return compiler_info->ac->gfx_level >= GFX9 ? nir_ior_imm(b, addr, 0xffffull << 48) : addr;
|
||||
}
|
||||
|
||||
nir_def *
|
||||
|
|
@ -431,11 +429,9 @@ nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool tr
|
|||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index)
|
||||
radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->cache_key.bvh8) {
|
||||
if (compiler_info->cache_key->bvh8) {
|
||||
/* Assume that vertices are uncompressed. */
|
||||
uint32_t offset = ROUND_DOWN_TO(RADV_GFX12_PRIMITIVE_NODE_HEADER_SIZE / 8, 4) + index * 3 * sizeof(float);
|
||||
nir_def *data[4];
|
||||
|
|
@ -461,12 +457,10 @@ radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *p
|
|||
}
|
||||
|
||||
void
|
||||
radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, wto_matrix);
|
||||
if (pdev->cache_key.bvh8)
|
||||
if (compiler_info->cache_key->bvh8)
|
||||
offset = offsetof(struct radv_gfx12_instance_node, wto_matrix);
|
||||
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
|
|
@ -476,12 +470,10 @@ radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instan
|
|||
}
|
||||
|
||||
void
|
||||
radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
unsigned offset = offsetof(struct radv_bvh_instance_node, otw_matrix);
|
||||
if (pdev->cache_key.bvh8)
|
||||
if (compiler_info->cache_key->bvh8)
|
||||
offset =
|
||||
sizeof(struct radv_gfx12_instance_node) + offsetof(struct radv_gfx12_instance_node_user_data, otw_matrix);
|
||||
|
||||
|
|
@ -492,11 +484,9 @@ radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instan
|
|||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
|
||||
radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->cache_key.bvh8) {
|
||||
if (compiler_info->cache_key->bvh8) {
|
||||
return nir_load_global(b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
|
|
@ -511,11 +501,9 @@ radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *i
|
|||
}
|
||||
|
||||
nir_def *
|
||||
radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr)
|
||||
radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->cache_key.bvh8) {
|
||||
if (compiler_info->cache_key->bvh8) {
|
||||
return nir_load_global(b, 1, 32,
|
||||
nir_iadd_imm(b, instance_addr,
|
||||
sizeof(struct radv_gfx12_instance_node) +
|
||||
|
|
@ -540,14 +528,14 @@ hit_is_opaque(nir_builder *b, nir_def *sbt_offset_and_flags, const struct radv_r
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, struct radv_ray_flags *ray_flags)
|
||||
create_bvh_descriptor(nir_builder *b, const struct radv_compiler_info *compiler_info, struct radv_ray_flags *ray_flags)
|
||||
{
|
||||
/* We create a BVH descriptor that covers the entire memory range. That way we can always
|
||||
* use the same descriptor, which avoids divergence when different rays hit different
|
||||
* instances at the cost of having to use 64-bit node ids. */
|
||||
const uint64_t bvh_size = 1ull << 42;
|
||||
|
||||
const uint32_t sort_triangles_first = pdev->cache_key.bvh8 ? BITFIELD_BIT(52 - 32) : 0;
|
||||
const uint32_t sort_triangles_first = compiler_info->cache_key->bvh8 ? BITFIELD_BIT(52 - 32) : 0;
|
||||
const uint32_t box_sort_enable = BITFIELD_BIT(63 - 32);
|
||||
const uint32_t triangle_return_mode = BITFIELD_BIT(120 - 96); /* Return IJ for triangles */
|
||||
|
||||
|
|
@ -556,7 +544,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
|
|||
uint32_t dword2 = (bvh_size - 1) & 0xFFFFFFFFu;
|
||||
uint32_t dword3 = ((bvh_size - 1) >> 32) | triangle_return_mode | (1u << 31);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (compiler_info->ac->gfx_level >= GFX11) {
|
||||
/* Enable pointer flags on GFX11+ */
|
||||
dword3 |= BITFIELD_BIT(119 - 96);
|
||||
|
||||
|
|
@ -570,7 +558,7 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
|
|||
nir_imm_int(b, (box_sort_largest << 21) | sort_triangles_first | box_sort_enable), dword1);
|
||||
}
|
||||
|
||||
if (pdev->cache_key.bvh8) {
|
||||
if (compiler_info->cache_key->bvh8) {
|
||||
/* compressed_format_en */
|
||||
dword3 |= BITFIELD_BIT(115 - 96);
|
||||
/* wide_sort_en */
|
||||
|
|
@ -584,10 +572,9 @@ create_bvh_descriptor(nir_builder *b, const struct radv_physical_device *pdev, s
|
|||
}
|
||||
|
||||
static void
|
||||
insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
insert_traversal_triangle_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
if (!args->triangle_cb)
|
||||
return;
|
||||
|
||||
|
|
@ -602,7 +589,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
{
|
||||
intersection.frontface = nir_fgt_imm(b, div, 0);
|
||||
nir_def *not_cull;
|
||||
if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt) {
|
||||
if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt) {
|
||||
nir_def *switch_ccw =
|
||||
nir_test_mask(b, nir_load_deref(b, args->vars.sbt_offset_and_flags), RADV_INSTANCE_TRIANGLE_FLIP_FACING);
|
||||
intersection.frontface = nir_ixor(b, intersection.frontface, switch_ccw);
|
||||
|
|
@ -623,7 +610,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
|
||||
nir_flt(b, args->tmin, intersection.t), not_cull));
|
||||
{
|
||||
intersection.base.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
intersection.base.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
|
||||
nir_def *triangle_info = nir_load_global(
|
||||
b, 2, 32,
|
||||
nir_iadd_imm(b, intersection.base.node_addr, offsetof(struct radv_bvh_triangle_node, triangle_id)));
|
||||
|
|
@ -648,7 +635,7 @@ insert_traversal_triangle_case(struct radv_device *device, nir_builder *b, const
|
|||
}
|
||||
|
||||
static void
|
||||
insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
|
||||
insert_traversal_triangle_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args, const struct radv_ray_flags *ray_flags,
|
||||
nir_variable *intrinsic_result, nir_def *result, nir_def *global_bvh_node,
|
||||
nir_def *bvh_node)
|
||||
|
|
@ -694,7 +681,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
|
|||
nir_def *dword3 = nir_bcsel(b, triangle0, nir_channel(b, result, 3), nir_channel(b, result, 7));
|
||||
|
||||
intersection.frontface = nir_inot(b, nir_test_mask(b, dword3, 1));
|
||||
intersection.base.node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
intersection.base.node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
|
||||
intersection.base.primitive_id = nir_ishr_imm(b, dword3, 1);
|
||||
intersection.base.geometry_id_and_flags =
|
||||
nir_ishr_imm(b, nir_bcsel(b, triangle0, nir_channel(b, result, 8), nir_channel(b, result, 9)), 2);
|
||||
|
|
@ -711,7 +698,7 @@ insert_traversal_triangle_case_gfx12(struct radv_device *device, nir_builder *b,
|
|||
}
|
||||
|
||||
static void
|
||||
insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
insert_traversal_aabb_case(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_def *bvh_node)
|
||||
{
|
||||
if (!args->aabb_cb)
|
||||
|
|
@ -720,7 +707,7 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
|
|||
nir_push_if(b, ray_flags->no_skip_aabbs);
|
||||
{
|
||||
struct radv_leaf_intersection intersection;
|
||||
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
|
||||
nir_def *triangle_info = nir_load_global(
|
||||
b, 2, 32, nir_iadd_imm(b, intersection.node_addr, offsetof(struct radv_bvh_aabb_node, primitive_id)));
|
||||
intersection.primitive_id = nir_channel(b, triangle_info, 0);
|
||||
|
|
@ -738,14 +725,14 @@ insert_traversal_aabb_case(struct radv_device *device, nir_builder *b, const str
|
|||
}
|
||||
|
||||
static void
|
||||
insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
insert_traversal_aabb_case_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args,
|
||||
const struct radv_ray_flags *ray_flags, nir_def *result, nir_def *bvh_node)
|
||||
{
|
||||
if (!args->aabb_cb)
|
||||
return;
|
||||
|
||||
struct radv_leaf_intersection intersection;
|
||||
intersection.node_addr = build_node_to_addr(device, b, bvh_node, false);
|
||||
intersection.node_addr = build_node_to_addr(compiler_info, b, bvh_node, false);
|
||||
intersection.primitive_id = nir_ishr_imm(b, nir_channel(b, result, 3), 1);
|
||||
intersection.geometry_id_and_flags = nir_ishr_imm(b, nir_channel(b, result, 8), 2);
|
||||
intersection.opaque = nir_inot(b, nir_test_mask(b, nir_channel(b, result, 2), 1u << 31));
|
||||
|
|
@ -758,10 +745,9 @@ insert_traversal_aabb_case_gfx12(struct radv_device *device, nir_builder *b, con
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
fetch_parent_node(struct radv_device *device, nir_builder *b, nir_def *bvh, nir_def *node)
|
||||
fetch_parent_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *bvh, nir_def *node)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, pdev->cache_key.bvh8 ? 16 : 8), 4), 4);
|
||||
nir_def *offset = nir_iadd_imm(b, nir_imul_imm(b, nir_udiv_imm(b, node, compiler_info->cache_key->bvh8 ? 16 : 8), 4), 4);
|
||||
return nir_load_global(b, 1, 32, nir_isub(b, bvh, nir_u2u64(b, offset)), .align_mul = 4);
|
||||
}
|
||||
|
||||
|
|
@ -780,10 +766,10 @@ radv_test_flag(nir_builder *b, const struct radv_ray_traversal_args *args, uint3
|
|||
}
|
||||
|
||||
static nir_def *
|
||||
build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def *base_addr, nir_def *ptr_flags,
|
||||
build_bvh_base(nir_builder *b, const struct radv_compiler_info *compiler_info, nir_def *base_addr, nir_def *ptr_flags,
|
||||
bool overwrite)
|
||||
{
|
||||
if (pdev->info.gfx_level < GFX11 || pdev->cache_key.emulate_rt)
|
||||
if (compiler_info->ac->gfx_level < GFX11 || compiler_info->cache_key->emulate_rt)
|
||||
return base_addr;
|
||||
|
||||
nir_def *base_addr_vec = nir_unpack_64_2x32(b, base_addr);
|
||||
|
|
@ -796,7 +782,7 @@ build_bvh_base(nir_builder *b, const struct radv_physical_device *pdev, nir_def
|
|||
}
|
||||
|
||||
static void
|
||||
build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, const struct radv_ray_traversal_args *args,
|
||||
build_instance_exit(nir_builder *b, const struct radv_compiler_info *compiler_info, const struct radv_ray_traversal_args *args,
|
||||
nir_def *stack_instance_exit, nir_def *ptr_flags)
|
||||
{
|
||||
nir_def *root_instance_exit = nir_iand(
|
||||
|
|
@ -805,7 +791,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
|
|||
nir_if *instance_exit = nir_push_if(b, nir_ior(b, stack_instance_exit, root_instance_exit));
|
||||
instance_exit->control = nir_selection_control_dont_flatten;
|
||||
{
|
||||
if (pdev->cache_key.bvh8 && args->use_bvh_stack_rtn)
|
||||
if (compiler_info->cache_key->bvh8 && args->use_bvh_stack_rtn)
|
||||
nir_store_deref(b, args->vars.stack,
|
||||
nir_ior_imm(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), 0x1);
|
||||
else
|
||||
|
|
@ -814,7 +800,7 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
|
|||
nir_store_deref(b, args->vars.instance_bottom_node, nir_imm_int(b, RADV_BVH_NO_INSTANCE_ROOT), 1);
|
||||
|
||||
nir_def *root_bvh_base =
|
||||
pdev->cache_key.bvh8 ? args->root_bvh_base : build_bvh_base(b, pdev, args->root_bvh_base, ptr_flags, true);
|
||||
compiler_info->cache_key->bvh8 ? args->root_bvh_base : build_bvh_base(b, compiler_info, args->root_bvh_base, ptr_flags, true);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, root_bvh_base, 0x1);
|
||||
nir_store_deref(b, args->vars.origin, args->origin, 7);
|
||||
|
|
@ -825,9 +811,8 @@ build_instance_exit(nir_builder *b, const struct radv_physical_device *pdev, con
|
|||
}
|
||||
|
||||
nir_def *
|
||||
radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec4_type(), "intrinsic_result");
|
||||
|
|
@ -849,9 +834,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_iand_imm(b, args->flags, ~(SpvRayFlagsTerminateOnFirstHitKHRMask | SpvRayFlagsSkipClosestHitShaderKHRMask));
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base,
|
||||
build_bvh_base(b, pdev, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1);
|
||||
build_bvh_base(b, compiler_info, nir_load_deref(b, args->vars.bvh_base), ptr_flags, true), 0x1);
|
||||
|
||||
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
|
||||
nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags);
|
||||
nir_def *vec3ones = nir_imm_vec3(b, 1.0, 1.0, 1.0);
|
||||
|
||||
nir_loop *loop = nir_push_loop(b);
|
||||
|
|
@ -868,7 +853,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
build_instance_exit(b, pdev, args,
|
||||
build_instance_exit(b, compiler_info, args,
|
||||
nir_ilt(b, nir_load_deref(b, args->vars.stack), nir_load_deref(b, args->vars.top_stack)),
|
||||
ptr_flags);
|
||||
}
|
||||
|
|
@ -885,7 +870,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
build_instance_exit(
|
||||
b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
|
||||
b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
|
||||
ptr_flags);
|
||||
}
|
||||
|
||||
|
|
@ -901,9 +886,9 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
if (args->use_bvh_stack_rtn)
|
||||
nir_store_deref(b, args->vars.stack, nir_iadd_imm(b, nir_load_deref(b, args->vars.stack), 1), 0x1);
|
||||
nir_def *prev = nir_load_deref(b, args->vars.previous_node);
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
|
||||
nir_def *parent = fetch_parent_node(device, b, bvh_addr, prev);
|
||||
nir_def *parent = fetch_parent_node(compiler_info, b, bvh_addr, prev);
|
||||
nir_push_if(b, nir_ieq_imm(b, parent, RADV_BVH_INVALID_NODE));
|
||||
{
|
||||
nir_store_var(b, incomplete, nir_imm_false(b), 0x1);
|
||||
|
|
@ -945,7 +930,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_def *global_bvh_node = nir_iadd(b, nir_load_deref(b, args->vars.bvh_base), nir_u2u64(b, bvh_node));
|
||||
|
||||
bool has_result = false;
|
||||
if (pdev->info.compiler_info.has_image_bvh_intersect_ray && !pdev->cache_key.emulate_rt) {
|
||||
if (compiler_info->ac->has_image_bvh_intersect_ray && !compiler_info->cache_key->emulate_rt) {
|
||||
nir_store_var(
|
||||
b, intrinsic_result,
|
||||
nir_bvh64_intersect_ray_amd(b, 32, desc, nir_unpack_64_2x32(b, global_bvh_node),
|
||||
|
|
@ -961,7 +946,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
{
|
||||
nir_push_if(b, nir_test_mask(b, bvh_node, BITFIELD64_BIT(ffs(radv_bvh_node_aabb) - 1)));
|
||||
{
|
||||
insert_traversal_aabb_case(device, b, args, &ray_flags, global_bvh_node);
|
||||
insert_traversal_aabb_case(compiler_info, b, args, &ray_flags, global_bvh_node);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
|
|
@ -972,14 +957,14 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
|
||||
/* instance */
|
||||
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
|
||||
nir_def *instance_data =
|
||||
nir_load_global(b, 4, 32, instance_node_addr, .align_mul = 64, .align_offset = 0);
|
||||
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(device, b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, instance_data, 3), 1);
|
||||
|
||||
|
|
@ -1006,7 +991,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
nir_def *instance_pointer = nir_pack_64_2x32(b, nir_trim_vector(b, instance_data, 2));
|
||||
instance_pointer = nir_iand(b, instance_pointer, instance_flag_mask);
|
||||
|
||||
nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, pdev, instance_pointer, ptr_flags, false),
|
||||
nir_store_deref(b, args->vars.bvh_base, build_bvh_base(b, compiler_info, instance_pointer, ptr_flags, false),
|
||||
0x1);
|
||||
|
||||
/* Push the instance root node onto the stack */
|
||||
|
|
@ -1040,7 +1025,7 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/box intersection here */
|
||||
result = intersect_ray_amd_software_box(
|
||||
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
|
||||
|
|
@ -1097,10 +1082,10 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
/* If we didn't run the intrinsic cause the hardware didn't support it,
|
||||
* emulate ray/tri intersection here */
|
||||
result = intersect_ray_amd_software_tri(
|
||||
device, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
compiler_info, b, global_bvh_node, nir_load_deref(b, args->vars.tmax), nir_load_deref(b, args->vars.origin),
|
||||
nir_load_deref(b, args->vars.dir), nir_load_deref(b, args->vars.inv_dir));
|
||||
}
|
||||
insert_traversal_triangle_case(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
insert_traversal_triangle_case(compiler_info, b, args, &ray_flags, result, global_bvh_node);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
|
||||
|
|
@ -1131,10 +1116,8 @@ radv_build_ray_traversal(struct radv_device *device, nir_builder *b, const struc
|
|||
}
|
||||
|
||||
nir_def *
|
||||
radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b, const struct radv_ray_traversal_args *args)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_variable *incomplete = nir_local_variable_create(b->impl, glsl_bool_type(), "incomplete");
|
||||
nir_store_var(b, incomplete, nir_imm_true(b), 0x1);
|
||||
nir_variable *intrinsic_result = nir_local_variable_create(b->impl, glsl_uvec_type(8), "intrinsic_result");
|
||||
|
|
@ -1152,7 +1135,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
.no_skip_aabbs = radv_test_flag(b, args, SpvRayFlagsSkipAABBsKHRMask, false),
|
||||
};
|
||||
|
||||
nir_def *desc = create_bvh_descriptor(b, pdev, &ray_flags);
|
||||
nir_def *desc = create_bvh_descriptor(b, compiler_info, &ray_flags);
|
||||
|
||||
nir_loop *loop = nir_push_loop(b);
|
||||
{
|
||||
|
|
@ -1168,7 +1151,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
nir_jump(b, nir_jump_break);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
build_instance_exit(b, pdev, args,
|
||||
build_instance_exit(b, compiler_info, args,
|
||||
nir_test_mask(b, nir_load_deref(b, args->vars.stack), RADV_BVH_STACK_FLAG_TLAS_POP), NULL);
|
||||
}
|
||||
|
||||
|
|
@ -1184,7 +1167,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
}
|
||||
nir_pop_if(b, NULL);
|
||||
build_instance_exit(
|
||||
b, pdev, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
|
||||
b, compiler_info, args, nir_ige(b, nir_load_deref(b, args->vars.top_stack), nir_load_deref(b, args->vars.stack)),
|
||||
NULL);
|
||||
}
|
||||
|
||||
|
|
@ -1210,7 +1193,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
nir_imm_int(b,
|
||||
(int32_t)offsetof(struct radv_gfx12_box_node, parent_id) - (radv_bvh_node_box32 << 3)));
|
||||
nir_def *offset = nir_iadd(b, nir_ishl_imm(b, prev, 3), field_offset);
|
||||
nir_def *bvh_addr = build_node_to_addr(device, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
nir_def *bvh_addr = build_node_to_addr(compiler_info, b, nir_load_deref(b, args->vars.bvh_base), true);
|
||||
loaded_parent_id = nir_load_global(b, 1, 32, nir_iadd(b, bvh_addr, nir_u2u64(b, offset)));
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
|
|
@ -1294,7 +1277,7 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
nir_push_else(b, NULL);
|
||||
{
|
||||
/* instance */
|
||||
nir_def *instance_node_addr = build_node_to_addr(device, b, global_bvh_node, false);
|
||||
nir_def *instance_node_addr = build_node_to_addr(compiler_info, b, global_bvh_node, false);
|
||||
nir_store_deref(b, args->vars.instance_addr, instance_node_addr, 1);
|
||||
|
||||
nir_store_deref(b, args->vars.sbt_offset_and_flags, nir_channel(b, result, 6), 1);
|
||||
|
|
@ -1377,13 +1360,13 @@ radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b, const
|
|||
nir_push_if(b, nir_test_mask(b, nir_channel(b, result, 1), 1u << 31));
|
||||
{
|
||||
nir_push_if(b, ray_flags.no_skip_aabbs);
|
||||
insert_traversal_aabb_case_gfx12(device, b, args, &ray_flags, result, global_bvh_node);
|
||||
insert_traversal_aabb_case_gfx12(compiler_info, b, args, &ray_flags, result, global_bvh_node);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_push_if(b, ray_flags.no_skip_triangles);
|
||||
insert_traversal_triangle_case_gfx12(device, b, args, &ray_flags, intrinsic_result, result, global_bvh_node,
|
||||
insert_traversal_triangle_case_gfx12(compiler_info, b, args, &ray_flags, intrinsic_result, result, global_bvh_node,
|
||||
bvh_node);
|
||||
nir_pop_if(b, NULL);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,27 +12,26 @@
|
|||
|
||||
#include "compiler/spirv/spirv.h"
|
||||
|
||||
struct radv_device;
|
||||
struct radv_physical_device;
|
||||
struct radv_compiler_info;
|
||||
|
||||
bool radv_use_bvh_stack_rtn(const struct radv_physical_device *pdevice);
|
||||
bool radv_use_bvh_stack_rtn(const struct radv_compiler_info *compiler_info);
|
||||
|
||||
nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_physical_device *pdev,
|
||||
nir_def *radv_build_bvh_stack_rtn_addr(nir_builder *b, nir_def *stack_idx, const struct radv_compiler_info *compiler_info,
|
||||
uint32_t workgroup_size, uint32_t stack_base, uint32_t max_stack_entries);
|
||||
|
||||
nir_def *build_addr_to_node(struct radv_device *device, nir_builder *b, nir_def *addr, nir_def *flags);
|
||||
nir_def *build_addr_to_node(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *addr, nir_def *flags);
|
||||
|
||||
nir_def *nir_build_vec3_mat_mult(nir_builder *b, nir_def *vec, nir_def *matrix[], bool translation);
|
||||
|
||||
nir_def *radv_load_vertex_position(struct radv_device *device, nir_builder *b, nir_def *primitive_addr, uint32_t index);
|
||||
nir_def *radv_load_vertex_position(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *primitive_addr, uint32_t index);
|
||||
|
||||
void radv_load_wto_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
void radv_load_wto_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
void radv_load_otw_matrix(struct radv_device *device, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
void radv_load_otw_matrix(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr, nir_def **out);
|
||||
|
||||
nir_def *radv_load_custom_instance(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
|
||||
nir_def *radv_load_custom_instance(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr);
|
||||
|
||||
nir_def *radv_load_instance_id(struct radv_device *device, nir_builder *b, nir_def *instance_addr);
|
||||
nir_def *radv_load_instance_id(const struct radv_compiler_info *compiler_info, nir_builder *b, nir_def *instance_addr);
|
||||
|
||||
struct radv_ray_traversal_args;
|
||||
|
||||
|
|
@ -162,10 +161,10 @@ struct radv_ray_traversal_args {
|
|||
* rayQueryProceedEXT. Traversal will only be considered incomplete, if one of the specified
|
||||
* callbacks breaks out of the traversal loop.
|
||||
*/
|
||||
nir_def *radv_build_ray_traversal(struct radv_device *device, nir_builder *b,
|
||||
nir_def *radv_build_ray_traversal(const struct radv_compiler_info *compiler_info, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
nir_def *radv_build_ray_traversal_gfx12(struct radv_device *device, nir_builder *b,
|
||||
nir_def *radv_build_ray_traversal_gfx12(const struct radv_compiler_info *compiler_info, nir_builder *b,
|
||||
const struct radv_ray_traversal_args *args);
|
||||
|
||||
#endif /* RADV_NIR_RT_COMMON_H */
|
||||
|
|
|
|||
|
|
@ -257,11 +257,9 @@ radv_nir_return_param_from_type(nir_parameter *param, const glsl_type *type, boo
|
|||
}
|
||||
|
||||
void
|
||||
radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap,
|
||||
struct radv_shader_debug_info *debug)
|
||||
radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
|
||||
bool uses_descriptor_heap, struct radv_shader_debug_info *debug)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_COMPUTE, "rt_prolog");
|
||||
stage->stage = MESA_SHADER_COMPUTE;
|
||||
stage->nir = b.shader;
|
||||
|
|
@ -270,24 +268,25 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
stage->info.loads_dynamic_offsets = true;
|
||||
stage->info.force_indirect_descriptors = true;
|
||||
stage->info.descriptor_heap = uses_descriptor_heap;
|
||||
stage->info.wave_size = pdev->rt_wave_size;
|
||||
stage->info.wave_size = compiler_info->rt_wave_size;
|
||||
stage->info.workgroup_size = stage->info.wave_size;
|
||||
stage->info.user_data_0 = R_00B900_COMPUTE_USER_DATA_0;
|
||||
stage->info.type = RADV_SHADER_TYPE_RT_PROLOG;
|
||||
stage->info.cs.block_size[0] = pdev->rt_wave_size;
|
||||
stage->info.cs.block_size[0] = compiler_info->rt_wave_size;
|
||||
stage->info.cs.block_size[1] = 1;
|
||||
stage->info.cs.block_size[2] = 1;
|
||||
stage->info.cs.uses_thread_id[0] = true;
|
||||
for (unsigned i = 0; i < 3; i++)
|
||||
stage->info.cs.uses_block_id[i] = true;
|
||||
|
||||
radv_declare_shader_args(device, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args, debug);
|
||||
radv_declare_shader_args(compiler_info, NULL, &stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &stage->args,
|
||||
debug);
|
||||
stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
|
||||
|
||||
b.shader->info.workgroup_size[0] = pdev->rt_wave_size;
|
||||
b.shader->info.api_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.max_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.min_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size;
|
||||
b.shader->info.api_subgroup_size = compiler_info->rt_wave_size;
|
||||
b.shader->info.max_subgroup_size = compiler_info->rt_wave_size;
|
||||
b.shader->info.min_subgroup_size = compiler_info->rt_wave_size;
|
||||
|
||||
nir_function *raygen_function = nir_function_create(b.shader, "raygen_func");
|
||||
radv_nir_init_rt_function_params(raygen_function, MESA_SHADER_RAYGEN, 0, 0, uses_descriptor_heap);
|
||||
|
|
@ -306,7 +305,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
nir_def *launch_size_addr = nir_pack_64_2x32(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.launch_size_addr));
|
||||
nir_def *traversal_addr =
|
||||
nir_pack_64_2x32_split(&b, ac_nir_load_arg(&b, &stage->args.ac, stage->args.ac.rt.traversal_shader_addr),
|
||||
nir_imm_int(&b, pdev->info.address32_hi));
|
||||
nir_imm_int(&b, compiler_info->hw.address32_hi));
|
||||
|
||||
nir_def *raygen_sbt = nir_pack_64_2x32(&b, ac_nir_load_smem(&b, 2, sbt_desc, nir_imm_int(&b, 0), 4, 0));
|
||||
nir_def *launch_sizes = ac_nir_load_smem(&b, 3, launch_size_addr, nir_imm_int(&b, 0), 4, 0);
|
||||
|
|
@ -320,7 +319,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
|
||||
nir_def *local_id = nir_channel(&b, nir_load_local_invocation_id(&b), 0);
|
||||
|
||||
nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size), local_id);
|
||||
nir_def *unswizzled_id_x = nir_iadd(&b, nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size), local_id);
|
||||
nir_def *unswizzled_id_y = wg_ids[1];
|
||||
|
||||
/* Swizzle ray launch IDs. We dispatch a 1D 32x1/64x1 workgroup natively. Many games dispatch
|
||||
|
|
@ -366,7 +365,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
swizzled_id_y = nir_bitfield_select(&b, nir_imm_int(&b, 0x3), swizzled_id_y, swizzled_id_shifted_y);
|
||||
|
||||
uint32_t workgroup_width = 8;
|
||||
uint32_t workgroup_height = pdev->rt_wave_size == 32 ? 4 : 8;
|
||||
uint32_t workgroup_height = compiler_info->rt_wave_size == 32 ? 4 : 8;
|
||||
uint32_t workgroup_height_mask = workgroup_height - 1;
|
||||
|
||||
/* Fix up the workgroup IDs after converting from 32x1/64x1 to 8x4/8x8. The X dimension of the
|
||||
|
|
@ -376,7 +375,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
* the fact we divided the X component of the ID.
|
||||
*/
|
||||
nir_def *wg_id_y_rem = nir_iand_imm(&b, wg_ids[1], workgroup_height_mask);
|
||||
nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], pdev->rt_wave_size);
|
||||
nir_def *new_wg_start_x = nir_imul_imm(&b, wg_ids[0], compiler_info->rt_wave_size);
|
||||
new_wg_start_x = nir_iadd(&b, new_wg_start_x, nir_imul_imm(&b, wg_id_y_rem, workgroup_width));
|
||||
|
||||
nir_def *new_wg_start_y = nir_iand_imm(&b, wg_ids[1], ~workgroup_height_mask);
|
||||
|
|
@ -393,7 +392,7 @@ radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage
|
|||
/* If parts of this wave would've exceeded the launch size in the X dimension, their threads will be masked out and
|
||||
* exec won't equal -1. In that case, using swizzled IDs is invalid.
|
||||
*/
|
||||
nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, pdev->rt_wave_size, nir_imm_true(&b)), -1);
|
||||
nir_def *partial_oob_x = nir_ine_imm(&b, nir_ballot(&b, 1, compiler_info->rt_wave_size, nir_imm_true(&b)), -1);
|
||||
nir_def *partial_oob_y = nir_uge(&b, wg_ids[1], y_wg_bound);
|
||||
|
||||
nir_def *partial_oob = nir_ior(&b, partial_oob_x, partial_oob_y);
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ typedef struct glsl_type glsl_type;
|
|||
*/
|
||||
|
||||
struct radv_rt_case_data {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
struct radv_ray_tracing_pipeline *pipeline;
|
||||
void *param_data;
|
||||
};
|
||||
|
|
@ -154,12 +154,12 @@ struct radv_nir_rt_traversal_result {
|
|||
nir_variable *barycentrics;
|
||||
};
|
||||
|
||||
struct radv_nir_rt_traversal_result radv_build_traversal(struct radv_device *device,
|
||||
struct radv_nir_rt_traversal_result radv_build_traversal(const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline, nir_builder *b,
|
||||
struct radv_nir_rt_traversal_params *params,
|
||||
struct radv_ray_tracing_stage_info *info);
|
||||
|
||||
void radv_build_rt_prolog(struct radv_device *device, struct radv_shader_stage *stage, bool uses_descriptor_heap,
|
||||
struct radv_shader_debug_info *debug);
|
||||
void radv_build_rt_prolog(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
|
||||
bool uses_descriptor_heap, struct radv_shader_debug_info *debug);
|
||||
|
||||
#endif // MESA_RADV_NIR_RT_STAGE_COMMON_H
|
||||
|
|
|
|||
|
|
@ -15,9 +15,7 @@
|
|||
|
||||
#include "ac_nir.h"
|
||||
#include "aco_nir_call_attribs.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_nir_rt_stage_functions.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_pipeline_rt.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
|
|
@ -69,7 +67,7 @@ radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *ni
|
|||
* Global variables for an RT pipeline
|
||||
*/
|
||||
struct rt_variables {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
const VkPipelineCreateFlags2 flags;
|
||||
|
||||
nir_variable *shader_addr;
|
||||
|
|
@ -107,10 +105,11 @@ struct rt_variables {
|
|||
};
|
||||
|
||||
static struct rt_variables
|
||||
create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags)
|
||||
create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const VkPipelineCreateFlags2 flags)
|
||||
{
|
||||
struct rt_variables vars = {
|
||||
.device = device,
|
||||
.compiler_info = compiler_info,
|
||||
.flags = flags,
|
||||
};
|
||||
vars.shader_addr = nir_variable_create(shader, nir_var_shader_temp, glsl_uint64_t_type(), "shader_addr");
|
||||
|
|
@ -184,6 +183,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
|
||||
struct radv_lower_rt_instruction_data *data = _data;
|
||||
struct rt_variables *vars = data->vars;
|
||||
const struct radv_compiler_info *compiler_info = vars->compiler_info;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
|
|
@ -297,7 +297,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_instance_custom_index: {
|
||||
ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_primitive_id: {
|
||||
|
|
@ -310,7 +310,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_flags: {
|
||||
|
|
@ -325,7 +325,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -337,20 +337,20 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix);
|
||||
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix);
|
||||
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
|
||||
nir_channel(b, otw_matrix[2], c));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_origin: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_direction: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false);
|
||||
break;
|
||||
}
|
||||
|
|
@ -430,7 +430,7 @@ radv_lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_data)
|
|||
}
|
||||
case nir_intrinsic_load_ray_triangle_vertex_positions: {
|
||||
nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr);
|
||||
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -554,7 +554,7 @@ init_cps_function(nir_function *function, bool has_position_fetch, bool uses_des
|
|||
|
||||
void
|
||||
radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader,
|
||||
struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
|
||||
bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info)
|
||||
{
|
||||
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
|
|
@ -574,7 +574,7 @@ radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *inf
|
|||
impl->function->params[idx].driver_attributes |= ACO_NIR_PARAM_ATTRIB_DISCARDABLE;
|
||||
}
|
||||
|
||||
struct rt_variables vars = create_rt_variables(shader, device, pipeline->base.base.create_flags);
|
||||
struct rt_variables vars = create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags);
|
||||
|
||||
struct radv_rt_shader_info rt_info = {0};
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,9 @@
|
|||
void radv_gather_unused_args(struct radv_ray_tracing_stage_info *info, nir_shader *nir);
|
||||
|
||||
void radv_nir_lower_rt_abi_cps(nir_shader *shader, const struct radv_shader_info *info, bool resume_shader,
|
||||
struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
bool has_position_fetch, const struct radv_ray_tracing_stage_info *traversal_info);
|
||||
const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline, bool has_position_fetch,
|
||||
const struct radv_ray_tracing_stage_info *traversal_info);
|
||||
void radv_nir_lower_rt_io_cps(nir_shader *shader);
|
||||
|
||||
#endif // RADV_NIR_RT_STAGE_CPS_H
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@
|
|||
#include "nir/radv_nir_rt_stage_common.h"
|
||||
#include "nir/radv_nir_rt_stage_functions.h"
|
||||
|
||||
#include "radv_device.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
#include "aco_nir_call_attribs.h"
|
||||
|
|
@ -172,7 +170,7 @@ radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage stage
|
|||
* Global variables for an RT pipeline
|
||||
*/
|
||||
struct rt_variables {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
const VkPipelineCreateFlags2 flags;
|
||||
|
||||
/* Stage-dependent parameter indices */
|
||||
|
|
@ -214,12 +212,12 @@ struct rt_variables {
|
|||
};
|
||||
|
||||
static struct rt_variables
|
||||
create_rt_variables(nir_shader *shader, struct radv_device *device, const VkPipelineCreateFlags2 flags,
|
||||
unsigned max_payload_size, unsigned max_hit_attrib_size)
|
||||
create_rt_variables(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
const VkPipelineCreateFlags2 flags, unsigned max_payload_size, unsigned max_hit_attrib_size)
|
||||
{
|
||||
const bool uses_descriptor_heap = flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
struct rt_variables vars = {
|
||||
.device = device,
|
||||
.compiler_info = compiler_info,
|
||||
.flags = flags,
|
||||
};
|
||||
|
||||
|
|
@ -349,6 +347,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
|
||||
struct rt_variables *vars = _vars;
|
||||
const bool uses_descriptor_heap = vars->flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
const struct radv_compiler_info *compiler_info = vars->compiler_info;
|
||||
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
|
|
@ -452,7 +451,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_instance_custom_index: {
|
||||
ret = radv_load_custom_instance(vars->device, b, nir_load_param(b, vars->instance_addr_param));
|
||||
ret = radv_load_custom_instance(compiler_info, b, nir_load_param(b, vars->instance_addr_param));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_primitive_id: {
|
||||
|
|
@ -465,7 +464,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
break;
|
||||
}
|
||||
case nir_intrinsic_load_instance_id: {
|
||||
ret = radv_load_instance_id(vars->device, b, nir_load_param(b, vars->instance_addr_param));
|
||||
ret = radv_load_instance_id(compiler_info, b, nir_load_param(b, vars->instance_addr_param));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_flags: {
|
||||
|
|
@ -483,7 +482,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_param(b, vars->instance_addr_param);
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -495,20 +494,20 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), otw_matrix);
|
||||
radv_load_otw_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), otw_matrix);
|
||||
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
|
||||
nir_channel(b, otw_matrix[2], c));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_origin: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_origin_param), wto_matrix, true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_direction: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_param(b, vars->instance_addr_param), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_param(b, vars->ray_direction_param), wto_matrix, false);
|
||||
break;
|
||||
}
|
||||
|
|
@ -713,7 +712,7 @@ lower_rt_instruction(nir_builder *b, nir_instr *instr, void *_vars)
|
|||
}
|
||||
case nir_intrinsic_load_ray_triangle_vertex_positions: {
|
||||
nir_def *primitive_addr = nir_load_param(b, vars->primitive_addr_param);
|
||||
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -852,7 +851,7 @@ radv_get_rt_shader_entrypoint(nir_shader *shader)
|
|||
|
||||
void
|
||||
radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size,
|
||||
uint32_t hit_attrib_size, struct radv_device *device,
|
||||
uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
|
|
@ -866,7 +865,7 @@ radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_inf
|
|||
uses_descriptor_heap);
|
||||
|
||||
struct rt_variables vars =
|
||||
create_rt_variables(shader, device, pipeline->base.base.create_flags, payload_size, hit_attrib_size);
|
||||
create_rt_variables(shader, compiler_info, pipeline->base.base.create_flags, payload_size, hit_attrib_size);
|
||||
|
||||
nir_builder b = nir_builder_at(nir_before_impl(impl));
|
||||
unsigned num_hit_attribs = DIV_ROUND_UP(hit_attrib_size, 4);
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ void radv_nir_init_rt_function_params(nir_function *function, mesa_shader_stage
|
|||
unsigned hit_attrib_size, bool uses_descriptor_heap);
|
||||
|
||||
void radv_nir_lower_rt_abi_functions(nir_shader *shader, const struct radv_shader_info *info, uint32_t payload_size,
|
||||
uint32_t hit_attrib_size, struct radv_device *device,
|
||||
uint32_t hit_attrib_size, const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline);
|
||||
void radv_nir_lower_rt_io_functions(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -11,12 +11,10 @@
|
|||
|
||||
#include "aco_nir_call_attribs.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_nir_rt_stage_functions.h"
|
||||
#include "radv_physical_device.h"
|
||||
|
||||
struct chit_miss_inlining_params {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
|
||||
struct radv_nir_rt_traversal_params *trav_params;
|
||||
struct radv_nir_rt_traversal_result *trav_result;
|
||||
|
|
@ -27,7 +25,7 @@ struct chit_miss_inlining_params {
|
|||
};
|
||||
|
||||
struct chit_miss_inlining_vars {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
|
||||
nir_variable *shader_record_ptr;
|
||||
nir_variable *origin;
|
||||
|
|
@ -104,6 +102,7 @@ static bool
|
|||
lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *_vars)
|
||||
{
|
||||
struct chit_miss_inlining_vars *vars = _vars;
|
||||
const struct radv_compiler_info *compiler_info = vars->compiler_info;
|
||||
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
|
|
@ -125,13 +124,13 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
|
|||
ret = nir_load_var(b, vars->tmin);
|
||||
break;
|
||||
case nir_intrinsic_load_ray_instance_custom_index:
|
||||
ret = radv_load_custom_instance(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
ret = nir_load_var(b, vars->primitive_id);
|
||||
break;
|
||||
case nir_intrinsic_load_instance_id:
|
||||
ret = radv_load_instance_id(vars->device, b, nir_load_var(b, vars->instance_addr));
|
||||
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, vars->instance_addr));
|
||||
break;
|
||||
case nir_intrinsic_load_ray_hit_kind:
|
||||
ret = nir_load_var(b, vars->hit_kind);
|
||||
|
|
@ -151,7 +150,7 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
|
|||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_var(b, vars->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -163,26 +162,26 @@ lower_rt_instruction_chit_miss(nir_builder *b, nir_intrinsic_instr *intr, void *
|
|||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), otw_matrix);
|
||||
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), otw_matrix);
|
||||
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
|
||||
nir_channel(b, otw_matrix[2], c));
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_origin: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->origin), wto_matrix, true);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_object_direction: {
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(vars->device, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, nir_load_var(b, vars->instance_addr), wto_matrix);
|
||||
ret = nir_build_vec3_mat_mult(b, nir_load_var(b, vars->direction), wto_matrix, false);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_load_ray_triangle_vertex_positions: {
|
||||
nir_def *primitive_addr = nir_load_var(b, vars->primitive_addr);
|
||||
ret = radv_load_vertex_position(vars->device, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -238,7 +237,6 @@ radv_nir_lower_rt_io_monolithic(nir_shader *nir)
|
|||
}
|
||||
|
||||
struct rt_variables {
|
||||
struct radv_device *device;
|
||||
const VkPipelineCreateFlags2 flags;
|
||||
|
||||
uint32_t payload_offset;
|
||||
|
|
@ -252,13 +250,13 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_
|
|||
struct radv_rt_case_data *data)
|
||||
{
|
||||
nir_shader *shader =
|
||||
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->recursive_shader].nir);
|
||||
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->recursive_shader].nir);
|
||||
assert(shader);
|
||||
|
||||
struct chit_miss_inlining_params *params = data->param_data;
|
||||
|
||||
struct chit_miss_inlining_vars vars = {
|
||||
.device = params->device,
|
||||
.compiler_info = params->compiler_info,
|
||||
};
|
||||
|
||||
nir_opt_dead_cf(shader);
|
||||
|
|
@ -283,7 +281,7 @@ radv_build_recursive_case(nir_builder *b, nir_def *idx, struct radv_ray_tracing_
|
|||
}
|
||||
|
||||
struct lower_rt_instruction_monolithic_state {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
struct radv_ray_tracing_pipeline *pipeline;
|
||||
const VkRayTracingPipelineCreateInfoKHR *pCreateInfo;
|
||||
|
||||
|
|
@ -296,8 +294,8 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
struct lower_rt_instruction_monolithic_state *state = data;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(state->device);
|
||||
struct rt_variables *vars = state->vars;
|
||||
const struct radv_compiler_info *compiler_info = state->compiler_info;
|
||||
|
||||
switch (intr->intrinsic) {
|
||||
case nir_intrinsic_execute_callable:
|
||||
|
|
@ -333,19 +331,19 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
nir_store_var(b, vars->stack_ptr, nir_iadd_imm(b, stack_ptr, vars->stack_size), 0x1);
|
||||
|
||||
struct radv_nir_rt_traversal_result result =
|
||||
radv_build_traversal(state->device, state->pipeline, b, ¶ms, NULL);
|
||||
radv_build_traversal(compiler_info, state->pipeline, b, ¶ms, NULL);
|
||||
|
||||
nir_store_var(b, vars->stack_ptr, stack_ptr, 0x1);
|
||||
|
||||
struct chit_miss_inlining_params inline_params = {
|
||||
.device = state->device,
|
||||
.compiler_info = compiler_info,
|
||||
.trav_params = ¶ms,
|
||||
.trav_result = &result,
|
||||
.payload_offset = vars->payload_offset,
|
||||
};
|
||||
|
||||
struct radv_rt_case_data case_data = {
|
||||
.device = state->device,
|
||||
.compiler_info = compiler_info,
|
||||
.pipeline = state->pipeline,
|
||||
.param_data = &inline_params,
|
||||
};
|
||||
|
|
@ -383,7 +381,7 @@ lower_rt_call_monolithic(nir_builder *b, nir_intrinsic_instr *intr, void *data)
|
|||
nir_pop_if(b, NULL);
|
||||
|
||||
b->shader->info.shared_size =
|
||||
MAX2(b->shader->info.shared_size, pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
||||
MAX2(b->shader->info.shared_size, compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t));
|
||||
|
||||
nir_instr_remove(&intr->instr);
|
||||
return true;
|
||||
|
|
@ -455,7 +453,7 @@ radv_count_ray_payload_size(nir_builder *b, nir_intrinsic_instr *instr, void *da
|
|||
}
|
||||
|
||||
void
|
||||
radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
|
||||
radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline)
|
||||
{
|
||||
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
|
|
@ -465,7 +463,6 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
|
|||
nir_builder b = nir_builder_at(nir_before_impl(impl));
|
||||
|
||||
struct rt_variables vars = {
|
||||
.device = device,
|
||||
.flags = pipeline->base.base.create_flags,
|
||||
.stack_size = b.shader->scratch_size,
|
||||
};
|
||||
|
|
@ -474,7 +471,7 @@ radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
|
|||
nir_store_var(&b, vars.stack_ptr, nir_imm_int(&b, 0), 0x1);
|
||||
|
||||
struct lower_rt_instruction_monolithic_state state = {
|
||||
.device = device,
|
||||
.compiler_info = compiler_info,
|
||||
.pipeline = pipeline,
|
||||
.vars = &vars,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
|
||||
#include "radv_pipeline_rt.h"
|
||||
|
||||
void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, struct radv_device *device,
|
||||
void radv_nir_lower_rt_abi_monolithic(nir_shader *shader, const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline);
|
||||
void radv_nir_lower_rt_io_monolithic(nir_shader *shader);
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
|
||||
#include "aco_nir_call_attribs.h"
|
||||
#include "nir_builder.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_meta_nir.h"
|
||||
#include "radv_nir_rt_stage_functions.h"
|
||||
#include "radv_physical_device.h"
|
||||
|
|
@ -64,7 +63,7 @@ struct anyhit_shader_vars {
|
|||
|
||||
/* Parameters passed through to an inlined any-hit/intersection shader */
|
||||
struct traversal_inlining_params {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
|
||||
radv_nir_ahit_isec_preprocess_cb preprocess;
|
||||
void *preprocess_data;
|
||||
|
|
@ -76,7 +75,7 @@ struct traversal_inlining_params {
|
|||
|
||||
/* Data about ray traversal passed through to AABB/Intersection callbacks */
|
||||
struct traversal_data {
|
||||
struct radv_device *device;
|
||||
const struct radv_compiler_info *compiler_info;
|
||||
struct radv_nir_rt_traversal_params *params;
|
||||
struct traversal_vars trav_vars;
|
||||
|
||||
|
|
@ -223,6 +222,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
|
|||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
struct traversal_inlining_params *params = _params;
|
||||
const struct radv_compiler_info *compiler_info = params->compiler_info;
|
||||
|
||||
nir_def *ret = NULL;
|
||||
switch (intr->intrinsic) {
|
||||
|
|
@ -252,13 +252,13 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
|
|||
ret = nir_load_var(b, params->anyhit_vars->tmin);
|
||||
break;
|
||||
case nir_intrinsic_load_ray_instance_custom_index:
|
||||
ret = radv_load_custom_instance(params->device, b, nir_load_var(b, params->candidate->instance_addr));
|
||||
ret = radv_load_custom_instance(compiler_info, b, nir_load_var(b, params->candidate->instance_addr));
|
||||
break;
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
ret = nir_load_var(b, params->candidate->primitive_id);
|
||||
break;
|
||||
case nir_intrinsic_load_instance_id:
|
||||
ret = radv_load_instance_id(params->device, b, nir_load_var(b, params->candidate->instance_addr));
|
||||
ret = radv_load_instance_id(compiler_info, b, nir_load_var(b, params->candidate->instance_addr));
|
||||
break;
|
||||
case nir_intrinsic_load_ray_hit_kind:
|
||||
ret = nir_load_var(b, params->candidate->hit_kind);
|
||||
|
|
@ -278,7 +278,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
|
|||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *instance_node_addr = nir_load_var(b, params->candidate->instance_addr);
|
||||
nir_def *wto_matrix[3];
|
||||
radv_load_wto_matrix(params->device, b, instance_node_addr, wto_matrix);
|
||||
radv_load_wto_matrix(compiler_info, b, instance_node_addr, wto_matrix);
|
||||
|
||||
nir_def *vals[3];
|
||||
for (unsigned i = 0; i < 3; ++i)
|
||||
|
|
@ -290,7 +290,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
|
|||
case nir_intrinsic_load_ray_object_to_world: {
|
||||
unsigned c = nir_intrinsic_column(intr);
|
||||
nir_def *otw_matrix[3];
|
||||
radv_load_otw_matrix(params->device, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix);
|
||||
radv_load_otw_matrix(compiler_info, b, nir_load_var(b, params->candidate->instance_addr), otw_matrix);
|
||||
ret = nir_vec3(b, nir_channel(b, otw_matrix[0], c), nir_channel(b, otw_matrix[1], c),
|
||||
nir_channel(b, otw_matrix[2], c));
|
||||
break;
|
||||
|
|
@ -346,7 +346,7 @@ lower_ahit_isec_intrinsics(nir_builder *b, nir_intrinsic_instr *intr, void *_par
|
|||
}
|
||||
case nir_intrinsic_load_ray_triangle_vertex_positions: {
|
||||
nir_def *primitive_addr = nir_load_var(b, params->candidate->primitive_addr);
|
||||
ret = radv_load_vertex_position(params->device, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
ret = radv_load_vertex_position(compiler_info, b, primitive_addr, nir_intrinsic_column(intr));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
|
@ -371,7 +371,7 @@ insert_inlined_shader(nir_builder *b, struct traversal_inlining_params *params,
|
|||
* duplicates to the original variables passed through in 'params'.
|
||||
*/
|
||||
struct traversal_inlining_params src_params = {
|
||||
.device = params->device,
|
||||
.compiler_info = params->compiler_info,
|
||||
};
|
||||
|
||||
struct traversal_vars src_trav_vars;
|
||||
|
|
@ -647,7 +647,7 @@ radv_build_ahit_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
|
|||
struct traversal_inlining_params *params = data->param_data;
|
||||
|
||||
nir_shader *nir_stage =
|
||||
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir);
|
||||
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir);
|
||||
assert(nir_stage);
|
||||
|
||||
params->preprocess(nir_stage, params->preprocess_data);
|
||||
|
|
@ -673,7 +673,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
|
|||
struct traversal_inlining_params *params = data->param_data;
|
||||
|
||||
nir_shader *nir_stage =
|
||||
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->intersection_shader].nir);
|
||||
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->intersection_shader].nir);
|
||||
assert(nir_stage);
|
||||
|
||||
params->preprocess(nir_stage, params->preprocess_data);
|
||||
|
|
@ -681,7 +681,7 @@ radv_build_isec_case(nir_builder *b, nir_def *sbt_idx, struct radv_ray_tracing_g
|
|||
nir_shader *any_hit_stage = NULL;
|
||||
if (group->any_hit_shader != VK_SHADER_UNUSED_KHR) {
|
||||
any_hit_stage =
|
||||
radv_pipeline_cache_handle_to_nir(data->device, data->pipeline->stages[group->any_hit_shader].nir);
|
||||
radv_pipeline_cache_handle_to_nir(data->compiler_info, data->pipeline->stages[group->any_hit_shader].nir);
|
||||
assert(any_hit_stage);
|
||||
|
||||
params->preprocess(any_hit_stage, params->preprocess_data);
|
||||
|
|
@ -698,7 +698,7 @@ static nir_def *
|
|||
radv_build_token_begin(nir_builder *b, struct traversal_data *data, nir_def *hit,
|
||||
enum radv_packed_token_type token_type, nir_def *token_size, uint32_t max_token_size)
|
||||
{
|
||||
struct radv_rra_trace_data *rra_trace = &data->device->rra_trace;
|
||||
struct radv_rra_trace_data *rra_trace = data->compiler_info->rra_trace;
|
||||
assert(rra_trace->ray_history_addr);
|
||||
assert(rra_trace->ray_history_buffer_size >= max_token_size);
|
||||
|
||||
|
|
@ -767,7 +767,7 @@ radv_build_end_trace_token(nir_builder *b, struct traversal_data *data, nir_def
|
|||
dst_addr = nir_iadd_imm(b, dst_addr, 8);
|
||||
|
||||
nir_def *dispatch_indices =
|
||||
ac_nir_load_smem(b, 2, nir_imm_int64(b, data->device->rra_trace.ray_history_addr),
|
||||
ac_nir_load_smem(b, 2, nir_imm_int64(b, data->compiler_info->rra_trace->ray_history_addr),
|
||||
nir_imm_int(b, offsetof(struct radv_ray_history_header, dispatch_index)), 4, 0);
|
||||
nir_def *dispatch_index = nir_iadd(b, nir_channel(b, dispatch_indices, 0), nir_channel(b, dispatch_indices, 1));
|
||||
nir_def *dispatch_and_flags = nir_iand_imm(b, data->params->cull_mask_and_flags, 0xFFFF);
|
||||
|
|
@ -883,7 +883,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1);
|
||||
|
||||
struct traversal_inlining_params inlining_params = {
|
||||
.device = data->device,
|
||||
.compiler_info = data->compiler_info,
|
||||
.trav_vars = &data->trav_vars,
|
||||
.candidate = &candidate_result,
|
||||
.anyhit_vars = &ahit_vars,
|
||||
|
|
@ -892,7 +892,7 @@ handle_candidate_triangle(nir_builder *b, struct radv_triangle_intersection *int
|
|||
};
|
||||
|
||||
struct radv_rt_case_data case_data = {
|
||||
.device = data->device,
|
||||
.compiler_info = data->compiler_info,
|
||||
.pipeline = data->pipeline,
|
||||
.param_data = &inlining_params,
|
||||
};
|
||||
|
|
@ -1040,7 +1040,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
nir_store_var(b, ahit_vars.shader_record_ptr, sbt_data.shader_record_ptr, 0x1);
|
||||
|
||||
struct traversal_inlining_params inlining_params = {
|
||||
.device = data->device,
|
||||
.compiler_info = data->compiler_info,
|
||||
.trav_vars = &data->trav_vars,
|
||||
.candidate = &candidate_result,
|
||||
.anyhit_vars = &ahit_vars,
|
||||
|
|
@ -1049,7 +1049,7 @@ handle_candidate_aabb(nir_builder *b, struct radv_leaf_intersection *intersectio
|
|||
};
|
||||
|
||||
struct radv_rt_case_data case_data = {
|
||||
.device = data->device,
|
||||
.compiler_info = data->compiler_info,
|
||||
.pipeline = data->pipeline,
|
||||
.param_data = &inlining_params,
|
||||
};
|
||||
|
|
@ -1148,17 +1148,17 @@ load_stack_entry(nir_builder *b, nir_def *index, const struct radv_ray_traversal
|
|||
}
|
||||
|
||||
struct radv_nir_rt_traversal_result
|
||||
radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline, nir_builder *b,
|
||||
struct radv_nir_rt_traversal_params *params, struct radv_ray_tracing_stage_info *info)
|
||||
radv_build_traversal(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
|
||||
nir_builder *b, struct radv_nir_rt_traversal_params *params,
|
||||
struct radv_ray_tracing_stage_info *info)
|
||||
{
|
||||
const bool uses_descriptor_heap = pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_DESCRIPTOR_HEAP_BIT_EXT;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
nir_variable *barycentrics =
|
||||
nir_variable_create(b->shader, nir_var_ray_hit_attrib, glsl_vector_type(GLSL_TYPE_FLOAT, 2), "barycentrics");
|
||||
barycentrics->data.driver_location = 0;
|
||||
|
||||
struct traversal_data data = {
|
||||
.device = device,
|
||||
.compiler_info = compiler_info,
|
||||
.params = params,
|
||||
.pipeline = pipeline,
|
||||
};
|
||||
|
|
@ -1202,25 +1202,27 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
}
|
||||
nir_push_else(b, NULL);
|
||||
{
|
||||
nir_store_var(b, data.trav_vars.current_node,
|
||||
nir_imm_int(b, radv_use_bvh_stack_rtn(pdev) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE),
|
||||
0x1);
|
||||
nir_store_var(
|
||||
b, data.trav_vars.current_node,
|
||||
nir_imm_int(b, radv_use_bvh_stack_rtn(compiler_info) ? RADV_BVH_STACK_TERMINAL_NODE : RADV_BVH_INVALID_NODE),
|
||||
0x1);
|
||||
}
|
||||
nir_pop_if(b, NULL);
|
||||
bvh_offset = nir_if_phi(b, bvh_offset, zero);
|
||||
|
||||
nir_def *root_bvh_base = nir_iadd(b, params->accel_struct, nir_u2u64(b, bvh_offset));
|
||||
root_bvh_base = build_addr_to_node(device, b, root_bvh_base, params->cull_mask_and_flags);
|
||||
root_bvh_base = build_addr_to_node(compiler_info, b, root_bvh_base, params->cull_mask_and_flags);
|
||||
|
||||
nir_def *stack_idx = nir_load_subgroup_invocation(b);
|
||||
uint32_t stack_stride;
|
||||
|
||||
if (radv_use_bvh_stack_rtn(pdev)) {
|
||||
stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, pdev, pdev->rt_wave_size, 0, MAX_STACK_ENTRY_COUNT);
|
||||
if (radv_use_bvh_stack_rtn(compiler_info)) {
|
||||
stack_idx = radv_build_bvh_stack_rtn_addr(b, stack_idx, compiler_info, compiler_info->rt_wave_size, 0,
|
||||
MAX_STACK_ENTRY_COUNT);
|
||||
stack_stride = 1;
|
||||
} else {
|
||||
stack_idx = nir_imul_imm(b, stack_idx, sizeof(uint32_t));
|
||||
stack_stride = pdev->rt_wave_size * sizeof(uint32_t);
|
||||
stack_stride = compiler_info->rt_wave_size * sizeof(uint32_t);
|
||||
}
|
||||
|
||||
nir_store_var(b, data.trav_vars.result.hit, nir_imm_false(b), 1);
|
||||
|
|
@ -1245,7 +1247,7 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
nir_store_var(b, data.trav_vars.top_stack, nir_imm_int(b, -1), 1);
|
||||
|
||||
nir_variable *iteration_instance_count = NULL;
|
||||
if (device->rra_trace.ray_history_addr) {
|
||||
if (compiler_info->rra_trace->ray_history_addr) {
|
||||
data.trav_vars.ahit_isec_count =
|
||||
nir_variable_create(b->shader, nir_var_shader_temp, glsl_uint_type(), "ahit_isec_count");
|
||||
iteration_instance_count =
|
||||
|
|
@ -1277,16 +1279,16 @@ radv_build_traversal(struct radv_device *device, struct radv_ray_tracing_pipelin
|
|||
.triangle_cb = (pipeline->base.base.create_flags & VK_PIPELINE_CREATE_2_RAY_TRACING_SKIP_TRIANGLES_BIT_KHR)
|
||||
? NULL
|
||||
: handle_candidate_triangle,
|
||||
.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(pdev),
|
||||
.use_bvh_stack_rtn = radv_use_bvh_stack_rtn(compiler_info),
|
||||
.data = &data,
|
||||
};
|
||||
|
||||
if (pdev->cache_key.bvh8)
|
||||
radv_build_ray_traversal_gfx12(device, b, &args);
|
||||
if (compiler_info->cache_key->bvh8)
|
||||
radv_build_ray_traversal_gfx12(compiler_info, b, &args);
|
||||
else
|
||||
radv_build_ray_traversal(device, b, &args);
|
||||
radv_build_ray_traversal(compiler_info, b, &args);
|
||||
|
||||
if (device->rra_trace.ray_history_addr)
|
||||
if (compiler_info->rra_trace->ray_history_addr)
|
||||
radv_build_end_trace_token(b, &data, nir_load_var(b, iteration_instance_count));
|
||||
|
||||
nir_progress(true, b->impl, nir_metadata_none);
|
||||
|
|
@ -1304,21 +1306,19 @@ preprocess_traversal_shader_ahit_isec(nir_shader *nir, void *cb)
|
|||
}
|
||||
|
||||
nir_shader *
|
||||
radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
radv_build_traversal_shader(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
|
||||
struct radv_ray_tracing_stage_info *info, radv_nir_traversal_preprocess_cb preprocess,
|
||||
uint32_t payload_size, uint32_t hit_attrib_size)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
/* Create the traversal shader as an intersection shader to prevent validation failures due to
|
||||
* invalid variable modes.*/
|
||||
nir_builder b = radv_meta_nir_init_shader(MESA_SHADER_INTERSECTION, "rt_traversal");
|
||||
b.shader->options = &pdev->nir_options[MESA_SHADER_INTERSECTION];
|
||||
b.shader->info.workgroup_size[0] = pdev->rt_wave_size;
|
||||
b.shader->info.api_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.max_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.min_subgroup_size = pdev->rt_wave_size;
|
||||
b.shader->info.shared_size = pdev->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
|
||||
b.shader->options = &compiler_info->nir_options[MESA_SHADER_INTERSECTION];
|
||||
b.shader->info.workgroup_size[0] = compiler_info->rt_wave_size;
|
||||
b.shader->info.api_subgroup_size = compiler_info->rt_wave_size;
|
||||
b.shader->info.max_subgroup_size = compiler_info->rt_wave_size;
|
||||
b.shader->info.min_subgroup_size = compiler_info->rt_wave_size;
|
||||
b.shader->info.shared_size = compiler_info->rt_wave_size * MAX_STACK_ENTRY_COUNT * sizeof(uint32_t);
|
||||
|
||||
struct radv_nir_rt_traversal_params params = {0};
|
||||
|
||||
|
|
@ -1354,7 +1354,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
params.hit_attrib_size = hit_attrib_size;
|
||||
params.ignore_cull_mask = false;
|
||||
|
||||
struct radv_nir_rt_traversal_result result = radv_build_traversal(device, pipeline, &b, ¶ms, info);
|
||||
struct radv_nir_rt_traversal_result result = radv_build_traversal(compiler_info, pipeline, &b, ¶ms, info);
|
||||
|
||||
b.cursor = nir_before_impl(nir_shader_get_entrypoint(b.shader));
|
||||
|
||||
|
|
@ -1366,7 +1366,7 @@ radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_
|
|||
}
|
||||
b.cursor = nir_after_impl(nir_shader_get_entrypoint(b.shader));
|
||||
|
||||
radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, pdev->rt_wave_size);
|
||||
radv_nir_lower_rt_storage(b.shader, hit_attrib_derefs, NULL, NULL, compiler_info->rt_wave_size);
|
||||
|
||||
nir_push_if(&b, nir_load_var(&b, result.hit));
|
||||
{
|
||||
|
|
|
|||
|
|
@ -13,7 +13,8 @@ typedef void (*radv_nir_traversal_preprocess_cb)(nir_shader *nir);
|
|||
|
||||
void radv_nir_lower_intersection_shader(nir_shader *intersection, nir_shader *any_hit);
|
||||
|
||||
nir_shader *radv_build_traversal_shader(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
nir_shader *radv_build_traversal_shader(const struct radv_compiler_info *compiler_info,
|
||||
struct radv_ray_tracing_pipeline *pipeline,
|
||||
struct radv_ray_tracing_stage_info *info,
|
||||
radv_nir_traversal_preprocess_cb preprocess, uint32_t payload_size,
|
||||
uint32_t hit_attrib_size);
|
||||
|
|
|
|||
|
|
@ -41,13 +41,12 @@ radv_calculate_lds_size(const struct radv_shader_info *radv, const enum amd_gfx_
|
|||
|
||||
static inline void
|
||||
radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv,
|
||||
const struct radv_shader_args *radv_args, const struct radv_device_cache_key *radv_key,
|
||||
const enum amd_gfx_level gfx_level)
|
||||
const struct radv_shader_args *radv_args, const struct radv_compiler_info *compiler_info)
|
||||
{
|
||||
bool ngg_wave_id_en = radv->ngg_wave_id_en;
|
||||
/* Separately compiled shader, where the next stage might use NGG streamout. */
|
||||
ngg_wave_id_en |= radv->is_ngg && radv->merged_shader_compiled_separately &&
|
||||
radv->next_stage == MESA_SHADER_GEOMETRY && gfx_level >= GFX11;
|
||||
radv->next_stage == MESA_SHADER_GEOMETRY && compiler_info->ac->gfx_level >= GFX11;
|
||||
|
||||
ASSIGN_FIELD(wave_size);
|
||||
ASSIGN_FIELD(workgroup_size);
|
||||
|
|
@ -60,16 +59,17 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv
|
|||
ASSIGN_FIELD(descriptor_heap);
|
||||
aco_info->vs.any_tcs_inputs_via_lds = radv->vs.tcs_inputs_via_lds != 0;
|
||||
/* S2 must not be modified for correct hang recovery when NGG_WAVE_ID_EN=1. */
|
||||
aco_info->vs.preserve_s2 = ngg_wave_id_en && gfx_level < GFX12;
|
||||
aco_info->vs.preserve_s2 = ngg_wave_id_en && compiler_info->ac->gfx_level < GFX12;
|
||||
aco_info->ps.spi_ps_input_ena = radv->ps.spi_ps_input_ena;
|
||||
aco_info->ps.spi_ps_input_addr = radv->ps.spi_ps_input_addr;
|
||||
aco_info->ps.has_prolog = false;
|
||||
aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d;
|
||||
aco_info->image_2d_view_of_3d = compiler_info->image_2d_view_of_3d;
|
||||
aco_info->epilog_pc = radv_args->epilog_pc;
|
||||
aco_info->hw_stage = radv_select_hw_stage(radv, gfx_level);
|
||||
aco_info->hw_stage = radv_select_hw_stage(radv, compiler_info->ac->gfx_level);
|
||||
aco_info->next_stage_pc = radv_args->next_stage_pc;
|
||||
aco_info->schedule_ngg_pos_exports = gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export;
|
||||
aco_info->lds_size = radv_calculate_lds_size(radv, gfx_level);
|
||||
aco_info->schedule_ngg_pos_exports =
|
||||
compiler_info->ac->gfx_level < GFX11 && radv->has_ngg_culling && radv->has_ngg_early_prim_export;
|
||||
aco_info->lds_size = radv_calculate_lds_size(radv, compiler_info->ac->gfx_level);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
|
|
|||
|
|
@ -6330,7 +6330,7 @@ lookup_ps_epilog(struct radv_cmd_buffer *cmd_buffer)
|
|||
}
|
||||
}
|
||||
|
||||
struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(device, &state);
|
||||
struct radv_ps_epilog_key key = radv_generate_ps_epilog_key(&device->compiler_info, &state);
|
||||
|
||||
/* Adjust the remapping for alpha-to-coverage without any color attachment and dual-source
|
||||
* blending to make sure colors written aren't cleared.
|
||||
|
|
@ -12829,7 +12829,7 @@ radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct r
|
|||
const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
uint32_t tess_num_patches, tess_lds_size;
|
||||
|
||||
radv_get_tess_wg_info(pdev, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out,
|
||||
radv_get_tess_wg_info(&device->compiler_info, &tcs->info.tcs.io_info, tcs->info.tcs.tcs_vertices_out,
|
||||
d->vk.ts.patch_control_points,
|
||||
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
|
||||
vs->info.vs.num_linked_outputs, &tess_num_patches, &tess_lds_size);
|
||||
|
|
@ -13104,10 +13104,10 @@ radv_bind_graphics_shaders(struct radv_cmd_buffer *cmd_buffer)
|
|||
|
||||
if (cmd_buffer->state.shaders[MESA_SHADER_GEOMETRY]->info.is_ngg) {
|
||||
gfx10_ngg_set_esgs_ring_itemsize(&es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(device, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
gfx10_get_ngg_info(&device->compiler_info, &es->info, &gs->info, &gs->info.ngg_info);
|
||||
radv_precompute_registers_hw_ngg(device, gs);
|
||||
} else {
|
||||
radv_get_legacy_gs_info(device, &es->info, &gs->info);
|
||||
radv_get_legacy_gs_info(&device->compiler_info, &es->info, &gs->info);
|
||||
radv_precompute_registers_hw_gs(device, &es->info, gs);
|
||||
|
||||
cmd_buffer->esgs_ring_size_needed = MAX2(cmd_buffer->esgs_ring_size_needed, gs->regs.gs.esgs_ring_size);
|
||||
|
|
|
|||
|
|
@ -289,7 +289,7 @@ radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
|
|||
|
||||
key->clear_lds = instance->drirc.misc.clear_lds;
|
||||
key->cs_wave32 = pdev->cs_wave_size == 32;
|
||||
key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
|
||||
key->disable_aniso_single_level = instance->drirc.debug.disable_aniso_single_level;
|
||||
key->disable_shrink_image_store = instance->drirc.debug.disable_shrink_image_store;
|
||||
key->disable_sinking_load_input_fs = instance->drirc.debug.disable_sinking_load_input_fs;
|
||||
key->disable_trunc_coord = instance->drirc.debug.disable_trunc_coord;
|
||||
|
|
|
|||
|
|
@ -36,25 +36,16 @@
|
|||
#include "vk_format.h"
|
||||
|
||||
bool
|
||||
radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags)
|
||||
radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) ||
|
||||
(instance->debug_flags & RADV_DEBUG_DUMP_SHADERS) || device->keep_shader_info;
|
||||
compiler_info->debug.capture_shaders;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags)
|
||||
radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
/* Capture shader statistics when RGP is enabled to correlate shader hashes with Fossilize. */
|
||||
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) ||
|
||||
(instance->debug_flags & (RADV_DEBUG_DUMP_SHADER_STATS | RADV_DEBUG_PSO_HISTORY)) ||
|
||||
device->keep_shader_info || (instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
|
||||
return (flags & VK_PIPELINE_CREATE_2_CAPTURE_STATISTICS_BIT_KHR) || compiler_info->debug.capture_shader_stats;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
@ -127,14 +118,13 @@ struct radv_shader_stage_key
|
|||
radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineShaderStageCreateInfo *stage,
|
||||
VkPipelineCreateFlags2 flags, const void *pNext)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
mesa_shader_stage s = vk_to_mesa_shader_stage(stage->stage);
|
||||
struct vk_pipeline_robustness_state rs;
|
||||
struct radv_shader_stage_key key = {0};
|
||||
|
||||
key.keep_statistic_info = radv_pipeline_capture_shader_stats(device, flags);
|
||||
key.keep_executable_info = radv_pipeline_capture_shaders(device, flags);
|
||||
key.keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, flags);
|
||||
key.keep_executable_info = radv_pipeline_capture_shaders(compiler_info, flags);
|
||||
|
||||
if (flags & VK_PIPELINE_CREATE_2_DISABLE_OPTIMIZATION_BIT)
|
||||
key.optimisations_disabled = 1;
|
||||
|
|
@ -149,12 +139,12 @@ radv_pipeline_get_shader_key(const struct radv_device *device, const VkPipelineS
|
|||
key.descriptor_heap = 1;
|
||||
|
||||
if (stage->stage & RADV_GRAPHICS_STAGE_BITS) {
|
||||
key.version = instance->drirc.misc.override_graphics_shader_version;
|
||||
key.version = compiler_info->override_graphics_shader_version;
|
||||
} else if (stage->stage & RADV_RT_STAGE_BITS) {
|
||||
key.version = instance->drirc.misc.override_ray_tracing_shader_version;
|
||||
key.version = compiler_info->override_ray_tracing_shader_version;
|
||||
} else {
|
||||
assert(stage->stage == VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
key.version = instance->drirc.misc.override_compute_shader_version;
|
||||
key.version = compiler_info->override_compute_shader_version;
|
||||
}
|
||||
|
||||
vk_pipeline_robustness_state_fill(&device->vk.robustness_state, &rs, pNext, stage->pNext);
|
||||
|
|
@ -262,12 +252,11 @@ non_uniform_access_callback(const nir_src *src, void *_)
|
|||
}
|
||||
|
||||
void
|
||||
radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
radv_postprocess_nir(const struct radv_compiler_info *compiler_info, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const bool use_llvm = pdev->use_llvm;
|
||||
enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
|
||||
const bool use_llvm = compiler_info->debug.use_llvm;
|
||||
bool progress;
|
||||
|
||||
/* Wave and workgroup size should already be filled. */
|
||||
|
|
@ -318,7 +307,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(progress, stage->nir, nir_opt_load_store_vectorize, &vectorize_opts);
|
||||
if (progress) {
|
||||
NIR_PASS(_, stage->nir, nir_opt_copy_prop);
|
||||
NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !pdev->cache_key.disable_shrink_image_store);
|
||||
NIR_PASS(_, stage->nir, nir_opt_shrink_stores, !compiler_info->cache_key->disable_shrink_image_store);
|
||||
|
||||
constant_fold_for_push_const = true;
|
||||
}
|
||||
|
|
@ -357,7 +346,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
&(ac_nir_lower_image_tex_options){
|
||||
.gfx_level = gfx_level,
|
||||
.lower_array_layer_round_even =
|
||||
!pdev->info.compiler_info.conformant_trunc_coord && !pdev->cache_key.disable_trunc_coord,
|
||||
!compiler_info->ac->conformant_trunc_coord && !compiler_info->cache_key->disable_trunc_coord,
|
||||
.fix_derivs_in_divergent_cf = stage->stage == MESA_SHADER_FRAGMENT && !use_llvm,
|
||||
.max_wqm_vgprs = 64, // TODO: improve spiller and RA support for linear VGPRs
|
||||
});
|
||||
|
|
@ -378,7 +367,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
/* This has to be done after nir_opt_algebraic for best descriptor vectorization, but also before
|
||||
* NGG culling.
|
||||
*/
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, device, stage);
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_descriptors, compiler_info, stage);
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_lower_alu_width, ac_nir_opt_vectorize_cb, &gfx_level);
|
||||
|
||||
|
|
@ -395,7 +384,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
|
||||
NIR_PASS(_, stage->nir, nir_opt_move, sink_opts);
|
||||
} else {
|
||||
if (stage->stage != MESA_SHADER_FRAGMENT || !pdev->cache_key.disable_sinking_load_input_fs)
|
||||
if (stage->stage != MESA_SHADER_FRAGMENT || !compiler_info->cache_key->disable_sinking_load_input_fs)
|
||||
sink_opts |= nir_move_load_input | nir_move_load_frag_coord;
|
||||
|
||||
NIR_PASS(_, stage->nir, nir_opt_sink, sink_opts);
|
||||
|
|
@ -407,13 +396,13 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
* load_input can be reordered, but buffer loads can't.
|
||||
*/
|
||||
if (stage->stage == MESA_SHADER_VERTEX) {
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, stage, gfx_state, &pdev->info);
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_vs_inputs, compiler_info, stage, gfx_state);
|
||||
}
|
||||
|
||||
/* Lower I/O intrinsics to memory instructions. */
|
||||
bool io_to_mem = radv_nir_lower_io_to_mem(device, stage);
|
||||
bool io_to_mem = radv_nir_lower_io_to_mem(compiler_info, stage);
|
||||
if (lowered_ngg) {
|
||||
radv_lower_ngg(device, stage, gfx_state);
|
||||
radv_lower_ngg(compiler_info, stage, gfx_state);
|
||||
} else if (is_last_vgt_stage) {
|
||||
if (stage->stage != MESA_SHADER_GEOMETRY) {
|
||||
NIR_PASS(_, stage->nir, ac_nir_lower_legacy_vs, gfx_level,
|
||||
|
|
@ -477,7 +466,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, ac_nir_lower_ps_late, &late_options);
|
||||
}
|
||||
|
||||
if (radv_shader_should_clear_lds(device, stage->nir)) {
|
||||
if (radv_shader_should_clear_lds(compiler_info, stage->nir)) {
|
||||
const unsigned chunk_size = 16; /* max single store size */
|
||||
const unsigned shared_size = align(stage->nir->info.shared_size, chunk_size);
|
||||
NIR_PASS(_, stage->nir, nir_clear_shared_memory, shared_size, chunk_size);
|
||||
|
|
@ -504,18 +493,18 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
.allow_fp16 = gfx_level >= GFX9,
|
||||
});
|
||||
|
||||
NIR_PASS(_, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac,
|
||||
&(ac_nir_lower_intrinsics_to_args_options){
|
||||
.gfx_level = gfx_level,
|
||||
.has_ls_vgpr_init_bug =
|
||||
pdev->info.compiler_info.has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog,
|
||||
.hw_stage = radv_select_hw_stage(&stage->info, gfx_level),
|
||||
.wave_size = stage->info.wave_size,
|
||||
.workgroup_size = stage->info.workgroup_size,
|
||||
.use_llvm = use_llvm,
|
||||
.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr,
|
||||
});
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
|
||||
NIR_PASS(
|
||||
_, stage->nir, ac_nir_lower_intrinsics_to_args, &stage->args.ac,
|
||||
&(ac_nir_lower_intrinsics_to_args_options){
|
||||
.gfx_level = gfx_level,
|
||||
.has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug && gfx_state && !gfx_state->vs.has_prolog,
|
||||
.hw_stage = radv_select_hw_stage(&stage->info, gfx_level),
|
||||
.wave_size = stage->info.wave_size,
|
||||
.workgroup_size = stage->info.workgroup_size,
|
||||
.use_llvm = use_llvm,
|
||||
.load_grid_size_from_user_sgpr = compiler_info->load_grid_size_from_user_sgpr,
|
||||
});
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, compiler_info->hw.address32_hi);
|
||||
|
||||
if (!stage->key.optimisations_disabled) {
|
||||
NIR_PASS(_, stage->nir, nir_opt_dce);
|
||||
|
|
@ -545,7 +534,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
NIR_PASS(_, stage->nir, ac_nir_lower_global_access);
|
||||
NIR_PASS(_, stage->nir, nir_lower_int64);
|
||||
|
||||
if (pdev->cache_key.mitigate_smem_oob)
|
||||
if (compiler_info->cache_key->mitigate_smem_oob)
|
||||
NIR_PASS(_, stage->nir, ac_nir_fixup_mem_access_gfx6, &stage->args.ac, 4096, true, true);
|
||||
|
||||
bool opt_intrinsics = false;
|
||||
|
|
@ -639,13 +628,11 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
}
|
||||
|
||||
bool
|
||||
radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader)
|
||||
radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
return (shader->info.stage == MESA_SHADER_COMPUTE || shader->info.stage == MESA_SHADER_MESH ||
|
||||
shader->info.stage == MESA_SHADER_TASK) &&
|
||||
shader->info.shared_size > 0 && pdev->cache_key.clear_lds;
|
||||
shader->info.shared_size > 0 && compiler_info->cache_key->clear_lds;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ struct radv_graphics_state_key;
|
|||
struct radv_shader_layout;
|
||||
struct nir_shader;
|
||||
typedef struct nir_shader nir_shader;
|
||||
struct radv_compiler_info;
|
||||
|
||||
enum radv_pipeline_type {
|
||||
RADV_PIPELINE_GRAPHICS,
|
||||
|
|
@ -72,9 +73,9 @@ VK_DEFINE_NONDISP_HANDLE_CASTS(radv_pipeline, base, VkPipeline, VK_OBJECT_TYPE_P
|
|||
return (struct radv_##pipe_type##_pipeline *)pipeline; \
|
||||
}
|
||||
|
||||
bool radv_pipeline_capture_shaders(const struct radv_device *device, VkPipelineCreateFlags2 flags);
|
||||
bool radv_pipeline_capture_shaders(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags);
|
||||
|
||||
bool radv_pipeline_capture_shader_stats(const struct radv_device *device, VkPipelineCreateFlags2 flags);
|
||||
bool radv_pipeline_capture_shader_stats(const struct radv_compiler_info *compiler_info, VkPipelineCreateFlags2 flags);
|
||||
|
||||
bool radv_pipeline_skip_shaders_cache(const struct radv_device *device, const struct radv_pipeline *pipeline);
|
||||
|
||||
|
|
@ -96,10 +97,10 @@ void radv_shader_layout_init(const struct radv_pipeline_layout *pipeline_layout,
|
|||
|
||||
void radv_pipeline_stage_finish(struct radv_shader_stage *stage);
|
||||
|
||||
void radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stage);
|
||||
void radv_postprocess_nir(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stage);
|
||||
|
||||
bool radv_shader_should_clear_lds(const struct radv_device *device, const nir_shader *shader);
|
||||
bool radv_shader_should_clear_lds(const struct radv_compiler_info *compiler_info, const nir_shader *shader);
|
||||
|
||||
VkPipelineShaderStageCreateInfo *radv_copy_shader_stage_create_info(struct radv_device *device, uint32_t stageCount,
|
||||
const VkPipelineShaderStageCreateInfo *pStages,
|
||||
|
|
|
|||
|
|
@ -130,30 +130,16 @@ radv_shader_cache_serialize(struct vk_pipeline_cache_object *object, struct blob
|
|||
}
|
||||
|
||||
static bool
|
||||
radv_is_cache_disabled(const struct radv_device *device, const struct vk_pipeline_cache *cache)
|
||||
radv_is_cache_disabled(const struct radv_compiler_info *compiler_info, const struct vk_pipeline_cache *cache)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
/* The buffer address used for debug printf is hardcoded. */
|
||||
if (device->debug_nir.printf.buffer_addr)
|
||||
return true;
|
||||
|
||||
/* The buffer address used for validating VAs is hardcoded. */
|
||||
if (device->debug_nir.valid_va.buffer_addr)
|
||||
return true;
|
||||
|
||||
/* Pipeline caches can be disabled with RADV_DEBUG=nocache, with MESA_GLSL_CACHE_DISABLE=1 and
|
||||
* when ACO_DEBUG is used. MESA_GLSL_CACHE_DISABLE is done elsewhere.
|
||||
*/
|
||||
if ((instance->debug_flags & RADV_DEBUG_NO_CACHE) || (pdev->use_llvm ? 0 : aco_get_codegen_flags()))
|
||||
if (compiler_info->cache_disabled)
|
||||
return true;
|
||||
|
||||
if (!cache) {
|
||||
/* When the application doesn't provide a pipeline cache and the in-memory cache is also
|
||||
* disabled.
|
||||
*/
|
||||
cache = device->mem_cache;
|
||||
cache = compiler_info->mem_cache;
|
||||
if (!cache)
|
||||
return true;
|
||||
}
|
||||
|
|
@ -165,7 +151,7 @@ struct radv_shader *
|
|||
radv_shader_create(struct radv_device *device, struct vk_pipeline_cache *cache, const struct radv_shader_binary *binary,
|
||||
bool skip_cache, struct radv_shader_debug_info *dbg)
|
||||
{
|
||||
if (radv_is_cache_disabled(device, cache) || skip_cache || (dbg && dbg->dump_shader)) {
|
||||
if (radv_is_cache_disabled(&device->compiler_info, cache) || skip_cache || (dbg && dbg->dump_shader)) {
|
||||
struct radv_shader *shader;
|
||||
radv_shader_create_uncached(device, binary, false, NULL, dbg, &shader);
|
||||
return shader;
|
||||
|
|
@ -339,7 +325,7 @@ radv_pipeline_cache_object_search(struct radv_device *device, struct vk_pipeline
|
|||
{
|
||||
*found_in_application_cache = false;
|
||||
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(&device->compiler_info, cache))
|
||||
return NULL;
|
||||
|
||||
bool *found = found_in_application_cache;
|
||||
|
|
@ -404,7 +390,7 @@ radv_compute_pipeline_cache_search(struct radv_device *device, struct vk_pipelin
|
|||
void
|
||||
radv_pipeline_cache_insert(struct radv_device *device, struct vk_pipeline_cache *cache, struct radv_pipeline *pipeline)
|
||||
{
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(&device->compiler_info, cache))
|
||||
return;
|
||||
|
||||
if (!cache)
|
||||
|
|
@ -490,7 +476,7 @@ radv_ray_tracing_pipeline_cache_search(struct radv_device *device, struct vk_pip
|
|||
pipeline->stages[i].shader = radv_shader_ref(pipeline_obj->shaders[idx++]);
|
||||
|
||||
if (pipeline->stages[i].needs_nir) {
|
||||
pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(device, cache, pipeline->stages[i].blake3);
|
||||
pipeline->stages[i].nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, pipeline->stages[i].blake3);
|
||||
complete &= pipeline->stages[i].nir != NULL;
|
||||
}
|
||||
}
|
||||
|
|
@ -510,7 +496,7 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip
|
|||
struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
|
||||
unsigned num_groups)
|
||||
{
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(&device->compiler_info, cache))
|
||||
return;
|
||||
|
||||
if (!cache)
|
||||
|
|
@ -574,49 +560,48 @@ radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct vk_pip
|
|||
}
|
||||
|
||||
nir_shader *
|
||||
radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache, mesa_shader_stage stage,
|
||||
const blake3_hash key)
|
||||
radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
mesa_shader_stage stage, const blake3_hash key)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(compiler_info, cache))
|
||||
return NULL;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
cache = compiler_info->mem_cache;
|
||||
|
||||
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &pdev->nir_options[stage], NULL, NULL);
|
||||
return vk_pipeline_cache_lookup_nir(cache, key, sizeof(blake3_hash), &compiler_info->nir_options[stage], NULL, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
|
||||
const nir_shader *nir)
|
||||
radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
const blake3_hash key, const nir_shader *nir)
|
||||
{
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(compiler_info, cache))
|
||||
return;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
cache = compiler_info->mem_cache;
|
||||
|
||||
vk_pipeline_cache_add_nir(cache, key, sizeof(blake3_hash), nir);
|
||||
}
|
||||
|
||||
struct vk_pipeline_cache_object *
|
||||
radv_pipeline_cache_lookup_nir_handle(struct radv_device *device, struct vk_pipeline_cache *cache, const uint8_t *blake3)
|
||||
radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
const uint8_t *blake3)
|
||||
{
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(compiler_info, cache))
|
||||
return NULL;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
cache = compiler_info->mem_cache;
|
||||
|
||||
return vk_pipeline_cache_lookup_object(cache, blake3, BLAKE3_KEY_LEN, &vk_raw_data_cache_object_ops, NULL);
|
||||
}
|
||||
|
||||
struct nir_shader *
|
||||
radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline_cache_object *object)
|
||||
radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info,
|
||||
struct vk_pipeline_cache_object *object)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct blob_reader blob;
|
||||
struct vk_raw_data_cache_object *nir_object = container_of(object, struct vk_raw_data_cache_object, base);
|
||||
blob_reader_init(&blob, nir_object->data, nir_object->data_size);
|
||||
|
|
@ -626,7 +611,7 @@ radv_pipeline_cache_handle_to_nir(struct radv_device *device, struct vk_pipeline
|
|||
ralloc_free(nir);
|
||||
return NULL;
|
||||
}
|
||||
nir->options = &pdev->nir_options[nir->info.stage];
|
||||
nir->options = &compiler_info->nir_options[nir->info.stage];
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
|
@ -652,7 +637,7 @@ radv_pipeline_cache_nir_to_handle(struct radv_device *device, struct vk_pipeline
|
|||
blob_finish_get_buffer(&blob, &data, &size);
|
||||
struct vk_pipeline_cache_object *object;
|
||||
|
||||
if (cached && !radv_is_cache_disabled(device, cache)) {
|
||||
if (cached && !radv_is_cache_disabled(&device->compiler_info, cache)) {
|
||||
object = vk_pipeline_cache_create_and_insert_object(cache, blake3, BLAKE3_KEY_LEN, data, size,
|
||||
&vk_raw_data_cache_object_ops);
|
||||
} else {
|
||||
|
|
@ -675,7 +660,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC
|
|||
|
||||
*found_in_internal_cache = false;
|
||||
|
||||
if (radv_is_cache_disabled(device, cache))
|
||||
if (radv_is_cache_disabled(&device->compiler_info, cache))
|
||||
return VK_SUCCESS;
|
||||
|
||||
struct vk_pipeline_cache_object *object =
|
||||
|
|
@ -711,7 +696,7 @@ radv_pipeline_cache_get_binaries(struct radv_device *device, const VkAllocationC
|
|||
shader = pipeline_obj->shaders[idx++];
|
||||
|
||||
if (data->is_library)
|
||||
nir = radv_pipeline_cache_lookup_nir_handle(device, cache, data->stages[i].blake3);
|
||||
nir = radv_pipeline_cache_lookup_nir_handle(&device->compiler_info, cache, data->stages[i].blake3);
|
||||
|
||||
result = radv_create_pipeline_binary_from_rt_shader(device, pAllocator, shader, false, data->stages[i].blake3,
|
||||
&stage_data->info, stage_data->stack_size, nir,
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ struct radv_shader_binary;
|
|||
struct radv_shader_debug_info;
|
||||
struct radv_shader_stage;
|
||||
struct radv_spirv_to_nir_options;
|
||||
struct radv_compiler_info;
|
||||
struct util_dynarray;
|
||||
struct nir_shader;
|
||||
typedef struct nir_shader nir_shader;
|
||||
|
|
@ -55,17 +56,18 @@ void radv_ray_tracing_pipeline_cache_insert(struct radv_device *device, struct v
|
|||
struct radv_ray_tracing_pipeline *pipeline, unsigned num_stages,
|
||||
unsigned num_groups);
|
||||
|
||||
nir_shader *radv_pipeline_cache_lookup_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
mesa_shader_stage stage, const blake3_hash key);
|
||||
nir_shader *radv_pipeline_cache_lookup_nir(const struct radv_compiler_info *compiler_info,
|
||||
struct vk_pipeline_cache *cache, mesa_shader_stage stage,
|
||||
const blake3_hash key);
|
||||
|
||||
void radv_pipeline_cache_insert_nir(struct radv_device *device, struct vk_pipeline_cache *cache, const blake3_hash key,
|
||||
const nir_shader *nir);
|
||||
void radv_pipeline_cache_insert_nir(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
const blake3_hash key, const nir_shader *nir);
|
||||
|
||||
struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(struct radv_device *device,
|
||||
struct vk_pipeline_cache_object *radv_pipeline_cache_lookup_nir_handle(const struct radv_compiler_info *compiler_info,
|
||||
struct vk_pipeline_cache *cache,
|
||||
const unsigned char *blake3);
|
||||
|
||||
struct nir_shader *radv_pipeline_cache_handle_to_nir(struct radv_device *device,
|
||||
struct nir_shader *radv_pipeline_cache_handle_to_nir(const struct radv_compiler_info *compiler_info,
|
||||
struct vk_pipeline_cache_object *object);
|
||||
|
||||
struct vk_pipeline_cache_object *radv_pipeline_cache_nir_to_handle(struct radv_device *device,
|
||||
|
|
|
|||
|
|
@ -98,35 +98,35 @@ radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct
|
|||
}
|
||||
|
||||
struct radv_shader_binary *
|
||||
radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage, bool keep_executable_info,
|
||||
bool keep_statistic_info, bool is_internal, struct radv_shader_debug_info *dbg)
|
||||
radv_compile_cs(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *cs_stage,
|
||||
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
|
||||
struct radv_shader_debug_info *dbg)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
/* Compile SPIR-V shader to NIR. */
|
||||
cs_stage->nir = radv_shader_spirv_to_nir(device, cs_stage, NULL, is_internal);
|
||||
cs_stage->nir = radv_shader_spirv_to_nir(compiler_info, cs_stage, NULL, is_internal);
|
||||
|
||||
radv_optimize_nir(cs_stage->nir, cs_stage->key.optimisations_disabled);
|
||||
|
||||
/* Run the shader info pass. */
|
||||
radv_nir_shader_info_init(cs_stage->stage, MESA_SHADER_NONE, &cs_stage->info);
|
||||
radv_nir_shader_info_pass(device, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL, RADV_PIPELINE_COMPUTE,
|
||||
false, &cs_stage->info);
|
||||
radv_nir_shader_info_pass(compiler_info, cs_stage->nir, &cs_stage->layout, &cs_stage->key, NULL,
|
||||
RADV_PIPELINE_COMPUTE, false, &cs_stage->info);
|
||||
|
||||
radv_declare_shader_args(device, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE, &cs_stage->args, dbg);
|
||||
radv_declare_shader_args(compiler_info, NULL, &cs_stage->info, MESA_SHADER_COMPUTE, MESA_SHADER_NONE,
|
||||
&cs_stage->args, dbg);
|
||||
|
||||
cs_stage->info.user_sgprs_locs = cs_stage->args.user_sgprs_locs;
|
||||
cs_stage->info.inline_push_constant_mask = cs_stage->args.ac.inline_push_const_mask;
|
||||
|
||||
/* Postprocess NIR. */
|
||||
radv_postprocess_nir(device, NULL, cs_stage);
|
||||
radv_postprocess_nir(compiler_info, NULL, cs_stage);
|
||||
|
||||
dbg->dump_shader = radv_can_dump_shader(device, cs_stage->nir);
|
||||
bool dump_nir = dbg->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
|
||||
dbg->dump_shader = radv_can_dump_shader(compiler_info, cs_stage->nir);
|
||||
bool dump_nir = dbg->dump_shader && compiler_info->debug.dump_nir;
|
||||
|
||||
if (dbg->dump_shader) {
|
||||
simple_mtx_lock(&instance->shader_dump_mtx);
|
||||
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
if (dump_nir) {
|
||||
nir_print_shader(cs_stage->nir, stderr);
|
||||
|
|
@ -134,23 +134,23 @@ radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage,
|
|||
}
|
||||
|
||||
/* Compile NIR shader to AMD assembly. */
|
||||
struct radv_shader_binary *cs_binary =
|
||||
radv_shader_nir_to_asm(device, cs_stage, &cs_stage->nir, 1, NULL, keep_executable_info, keep_statistic_info);
|
||||
struct radv_shader_binary *cs_binary = radv_shader_nir_to_asm(compiler_info, cs_stage, &cs_stage->nir, 1, NULL,
|
||||
keep_executable_info, keep_statistic_info);
|
||||
|
||||
/* Dump NIR after nir_to_asm, because ACO modifies it. */
|
||||
char *nir_string = NULL;
|
||||
if (keep_executable_info || dbg->dump_shader)
|
||||
nir_string = radv_dump_nir_shaders(instance, &cs_stage->nir, 1);
|
||||
nir_string = radv_dump_nir_shaders(compiler_info, &cs_stage->nir, 1);
|
||||
|
||||
radv_parse_binary_debug_info(device, cs_binary, dbg);
|
||||
radv_parse_binary_debug_info(compiler_info, cs_binary, dbg);
|
||||
|
||||
dbg->nir_string = nir_string;
|
||||
dbg->stages = 1 << MESA_SHADER_COMPUTE;
|
||||
|
||||
radv_shader_dump_asm(device, dbg, &cs_stage->info);
|
||||
radv_shader_dump_asm(compiler_info, dbg, &cs_stage->info);
|
||||
|
||||
if (dbg->dump_shader)
|
||||
simple_mtx_unlock(&instance->shader_dump_mtx);
|
||||
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
if (keep_executable_info && cs_stage->spirv.size) {
|
||||
dbg->spirv = malloc(cs_stage->spirv.size);
|
||||
|
|
@ -185,8 +185,9 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
|
|||
struct vk_pipeline_cache *cache, const VkPipelineShaderStageCreateInfo *pStage,
|
||||
const VkPipelineCreationFeedbackCreateInfo *creation_feedback)
|
||||
{
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags);
|
||||
const bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
|
||||
struct radv_shader_stage cs_stage = {0};
|
||||
VkPipelineCreationFeedback pipeline_feedback = {
|
||||
|
|
@ -220,8 +221,8 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
|
|||
radv_pipeline_stage_init(pipeline->base.create_flags, pStage, pipeline_layout, &stage_key, &cs_stage);
|
||||
|
||||
struct radv_shader_debug_info cs_dbg = {0};
|
||||
struct radv_shader_binary *cs_binary = radv_compile_cs(device, &cs_stage, keep_executable_info, keep_statistic_info,
|
||||
pipeline->base.is_internal, &cs_dbg);
|
||||
struct radv_shader_binary *cs_binary = radv_compile_cs(compiler_info, &cs_stage, keep_executable_info,
|
||||
keep_statistic_info, pipeline->base.is_internal, &cs_dbg);
|
||||
pipeline->base.shaders[MESA_SHADER_COMPUTE] =
|
||||
radv_shader_create(device, cache, cs_binary, skip_shaders_cache, &cs_dbg);
|
||||
|
||||
|
|
@ -232,7 +233,7 @@ radv_compute_pipeline_compile(const VkComputePipelineCreateInfo *pCreateInfo, st
|
|||
}
|
||||
|
||||
free(cs_binary);
|
||||
if (radv_can_dump_shader_stats(device, cs_stage.nir)) {
|
||||
if (radv_can_dump_shader_stats(&device->compiler_info, cs_stage.nir)) {
|
||||
radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[MESA_SHADER_COMPUTE], stderr);
|
||||
}
|
||||
radv_pipeline_stage_finish(&cs_stage);
|
||||
|
|
|
|||
|
|
@ -42,8 +42,9 @@ void radv_get_compute_shader_metadata(const struct radv_device *device, const st
|
|||
void radv_compute_pipeline_init(struct radv_compute_pipeline *pipeline, const struct radv_pipeline_layout *layout,
|
||||
struct radv_shader *shader);
|
||||
|
||||
struct radv_shader_binary *radv_compile_cs(struct radv_device *device, struct radv_shader_stage *cs_stage,
|
||||
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
|
||||
struct radv_shader_binary *radv_compile_cs(const struct radv_compiler_info *compiler_info,
|
||||
struct radv_shader_stage *cs_stage, bool keep_executable_info,
|
||||
bool keep_statistic_info, bool is_internal,
|
||||
struct radv_shader_debug_info *dbg);
|
||||
|
||||
VkResult radv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache,
|
||||
|
|
|
|||
|
|
@ -98,18 +98,17 @@ radv_blend_remove_dst(VkBlendOp *func, VkBlendFactor *src_factor, VkBlendFactor
|
|||
}
|
||||
|
||||
static unsigned
|
||||
radv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, bool blend_enable,
|
||||
radv_choose_spi_color_format(const struct radv_compiler_info *compiler_info, VkFormat vk_format, bool blend_enable,
|
||||
bool blend_need_alpha)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct util_format_description *desc = radv_format_description(vk_format);
|
||||
bool use_rbplus = pdev->info.rbplus_allowed;
|
||||
bool use_rbplus = compiler_info->hw.rbplus_allowed;
|
||||
struct ac_spi_color_formats formats = {0};
|
||||
unsigned format, ntype, swap;
|
||||
|
||||
format = ac_get_cb_format(pdev->info.gfx_level, desc->format);
|
||||
format = ac_get_cb_format(compiler_info->ac->gfx_level, desc->format);
|
||||
ntype = ac_get_cb_number_type(desc->format);
|
||||
swap = ac_translate_colorswap(pdev->info.gfx_level, desc->format, false);
|
||||
swap = ac_translate_colorswap(compiler_info->ac->gfx_level, desc->format, false);
|
||||
|
||||
ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats);
|
||||
|
||||
|
|
@ -1464,9 +1463,8 @@ radv_graphics_shaders_link_varyings(struct radv_shader_stage *stages, enum amd_g
|
|||
}
|
||||
|
||||
struct radv_ps_epilog_key
|
||||
radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_ps_epilog_state *state)
|
||||
radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_state *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0, z_format = 0, no_signed_zero = 0;
|
||||
struct radv_ps_epilog_key key;
|
||||
|
||||
|
|
@ -1484,7 +1482,7 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
} else {
|
||||
const bool blend_enable = (state->color_blend_enable >> i) & 0x1u;
|
||||
|
||||
cf = radv_choose_spi_color_format(device, fmt, blend_enable, state->need_src_alpha & (1 << i));
|
||||
cf = radv_choose_spi_color_format(compiler_info, fmt, blend_enable, state->need_src_alpha & (1 << i));
|
||||
|
||||
uint32_t comp_used = util_format_colormask(vk_format_description(fmt));
|
||||
|
||||
|
|
@ -1530,9 +1528,9 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
state->alpha_to_coverage_via_mrtz);
|
||||
|
||||
key.spi_shader_col_format = col_format;
|
||||
key.color_is_int8 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
|
||||
key.color_is_int10 = pdev->info.compiler_info.has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
|
||||
key.enable_mrt_output_nan_fixup = pdev->cache_key.enable_mrt_output_nan_fixup ? is_float32 : 0;
|
||||
key.color_is_int8 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int8 : 0;
|
||||
key.color_is_int10 = compiler_info->ac->has_cb_lt16bit_int_clamp_bug ? is_int10 : 0;
|
||||
key.enable_mrt_output_nan_fixup = compiler_info->cache_key->enable_mrt_output_nan_fixup ? is_float32 : 0;
|
||||
key.no_signed_zero = no_signed_zero;
|
||||
key.colors_written = state->colors_written;
|
||||
key.mrt0_is_dual_src = state->mrt0_is_dual_src && key.colors_needed & 0xf;
|
||||
|
|
@ -1547,7 +1545,8 @@ radv_generate_ps_epilog_key(const struct radv_device *device, const struct radv_
|
|||
}
|
||||
|
||||
static struct radv_ps_epilog_key
|
||||
radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state)
|
||||
radv_pipeline_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info,
|
||||
const struct vk_graphics_pipeline_state *state)
|
||||
{
|
||||
struct radv_ps_epilog_state ps_epilog = {0};
|
||||
|
||||
|
|
@ -1605,14 +1604,14 @@ radv_pipeline_generate_ps_epilog_key(const struct radv_device *device, const str
|
|||
ps_epilog.color_attachment_mappings[i] = state->cal ? state->cal->color_map[i] : i;
|
||||
}
|
||||
|
||||
return radv_generate_ps_epilog_key(device, &ps_epilog);
|
||||
return radv_generate_ps_epilog_key(compiler_info, &ps_epilog);
|
||||
}
|
||||
|
||||
static struct radv_graphics_state_key
|
||||
radv_generate_graphics_state_key(const struct radv_device *device, const struct vk_graphics_pipeline_state *state,
|
||||
radv_generate_graphics_state_key(const struct radv_compiler_info *compiler_info,
|
||||
const struct vk_graphics_pipeline_state *state,
|
||||
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags, uint32_t custom_blend_mode)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_graphics_state_key key;
|
||||
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
|
@ -1646,7 +1645,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
key.vi.instance_rate_divisors[i] = state->vi->bindings[binding].divisor;
|
||||
|
||||
/* vertex_attribute_strides is only needed to workaround GFX6/7 offset>=stride checks. */
|
||||
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && pdev->info.gfx_level < GFX8) {
|
||||
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_VI_BINDING_STRIDES) && compiler_info->ac->gfx_level < GFX8) {
|
||||
/* From the Vulkan spec 1.2.157:
|
||||
*
|
||||
* "If the bound pipeline state object was created with the
|
||||
|
|
@ -1666,7 +1665,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
}
|
||||
|
||||
const struct ac_vtx_format_info *vtx_info = ac_get_vtx_format_info(
|
||||
pdev->info.gfx_level, pdev->info.compiler_info.has_vtx_format_alpha_adjust_bug, format);
|
||||
compiler_info->ac->gfx_level, compiler_info->ac->has_vtx_format_alpha_adjust_bug, format);
|
||||
unsigned attrib_align = vtx_info->chan_byte_size ? vtx_info->chan_byte_size : vtx_info->element_size;
|
||||
|
||||
/* If offset is misaligned, then the buffer offset must be too. Just skip updating
|
||||
|
|
@ -1691,7 +1690,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
* alpha-to-one is enabled (alpha to MRTZ.a and one to MRT0.a).
|
||||
*/
|
||||
key.ms.alpha_to_coverage_via_mrtz =
|
||||
alpha_to_coverage_enabled && (pdev->info.gfx_level >= GFX11 || alpha_to_one_enabled);
|
||||
alpha_to_coverage_enabled && (compiler_info->ac->gfx_level >= GFX11 || alpha_to_one_enabled);
|
||||
|
||||
if (state->ms) {
|
||||
key.ms.sample_shading_enable = state->ms->sample_shading_enable;
|
||||
|
|
@ -1711,29 +1710,29 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
}
|
||||
|
||||
if (state->rs) {
|
||||
if (pdev->info.gfx_level >= GFX10)
|
||||
if (compiler_info->ac->gfx_level >= GFX10)
|
||||
key.rs.provoking_vtx_last = state->rs->provoking_vertex == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
|
||||
|
||||
if (!BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_RS_CULL_MODE))
|
||||
key.rs.cull_mode = state->rs->cull_mode;
|
||||
}
|
||||
|
||||
key.ps.force_vrs_enabled = device->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
|
||||
key.ps.force_vrs_enabled = compiler_info->force_vrs_enabled && !radv_is_static_vrs_enabled(state);
|
||||
|
||||
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && pdev->info.compiler_info.has_vrs_frag_pos_z_bug)
|
||||
if ((radv_is_vrs_enabled(state) || key.ps.force_vrs_enabled) && compiler_info->ac->has_vrs_frag_pos_z_bug)
|
||||
key.adjust_frag_coord_z = true;
|
||||
|
||||
if (radv_pipeline_needs_ps_epilog(state, lib_flags))
|
||||
key.ps.has_epilog = true;
|
||||
|
||||
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(device, state);
|
||||
key.ps.epilog = radv_pipeline_generate_ps_epilog_key(compiler_info, state);
|
||||
|
||||
/* Alpha to coverage is exported via MRTZ when depth/stencil/samplemask are also exported.
|
||||
* Though, when a PS epilog is needed and the MS state is NULL (with dynamic rendering), it's not
|
||||
* possible to know the info at compile time and MRTZ needs to be exported in the epilog.
|
||||
*/
|
||||
if (key.ps.has_epilog) {
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (compiler_info->ac->gfx_level >= GFX11) {
|
||||
key.ps.exports_mrtz_via_epilog = alpha_to_coverage_unknown;
|
||||
} else {
|
||||
key.ps.exports_mrtz_via_epilog =
|
||||
|
|
@ -1744,7 +1743,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
key.dynamic_rasterization_samples = BITSET_TEST(state->dynamic, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
|
||||
(!!(state->shader_stages & VK_SHADER_STAGE_FRAGMENT_BIT) && !state->ms);
|
||||
|
||||
if (pdev->use_ngg) {
|
||||
if (compiler_info->use_ngg) {
|
||||
VkShaderStageFlags ngg_stage;
|
||||
|
||||
if (state->shader_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
|
||||
|
|
@ -1767,7 +1766,7 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
key.enable_remove_point_size = true;
|
||||
}
|
||||
|
||||
if (device->vk.enabled_features.smoothLines) {
|
||||
if (compiler_info->smooth_lines) {
|
||||
/* Make the line rasterization mode dynamic for smooth lines to conditionally enable the lowering at draw time.
|
||||
* This is because it's not possible to know if the graphics pipeline will draw lines at this point and it also
|
||||
* simplifies the implementation.
|
||||
|
|
@ -1784,7 +1783,8 @@ radv_generate_graphics_state_key(const struct radv_device *device, const struct
|
|||
!(lib_flags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT);
|
||||
}
|
||||
|
||||
key.dcc_decompress_gfx11 = pdev->info.gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11;
|
||||
key.dcc_decompress_gfx11 =
|
||||
compiler_info->ac->gfx_level >= GFX11 && custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11;
|
||||
|
||||
return key;
|
||||
}
|
||||
|
|
@ -1794,6 +1794,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
|
|||
const struct vk_graphics_pipeline_state *state,
|
||||
VkGraphicsPipelineLibraryFlagBitsEXT lib_flags)
|
||||
{
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
VkPipelineCreateFlags2 create_flags = vk_graphics_pipeline_create_flags(pCreateInfo);
|
||||
struct radv_graphics_pipeline_key key = {0};
|
||||
uint32_t custom_blend_mode = 0;
|
||||
|
|
@ -1804,7 +1805,7 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
|
|||
custom_blend_mode = radv_info->custom_blend_mode;
|
||||
}
|
||||
|
||||
key.gfx_state = radv_generate_graphics_state_key(device, state, lib_flags, custom_blend_mode);
|
||||
key.gfx_state = radv_generate_graphics_state_key(compiler_info, state, lib_flags, custom_blend_mode);
|
||||
|
||||
for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
|
||||
const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->pStages[i];
|
||||
|
|
@ -1820,12 +1821,10 @@ radv_generate_graphics_pipeline_key(const struct radv_device *device, const VkGr
|
|||
}
|
||||
|
||||
static void
|
||||
radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *stages,
|
||||
radv_fill_shader_info_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
|
||||
VkShaderStageFlagBits active_nir_stages)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (!pdev->cache_key.use_ngg)
|
||||
if (!compiler_info->cache_key->use_ngg)
|
||||
return;
|
||||
|
||||
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.next_stage != MESA_SHADER_TESS_CTRL) {
|
||||
|
|
@ -1836,7 +1835,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
|
|||
stages[MESA_SHADER_MESH].info.is_ngg = true;
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11) {
|
||||
if (compiler_info->ac->gfx_level >= GFX11) {
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir)
|
||||
stages[MESA_SHADER_GEOMETRY].info.is_ngg = true;
|
||||
} else {
|
||||
|
|
@ -1861,7 +1860,7 @@ radv_fill_shader_info_ngg(struct radv_device *device, struct radv_shader_stage *
|
|||
}
|
||||
|
||||
if ((last_vgt_stage && last_vgt_stage->nir->xfb_info) ||
|
||||
(pdev->cache_key.no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) {
|
||||
(compiler_info->cache_key->no_ngg_gs && stages[MESA_SHADER_GEOMETRY].nir)) {
|
||||
/* NGG needs to be disabled on GFX10/GFX10.3 when:
|
||||
* - streamout is used because NGG streamout isn't supported
|
||||
* - NGG GS is explictly disabled to workaround performance issues
|
||||
|
|
@ -1980,7 +1979,7 @@ radv_get_next_stage(mesa_shader_stage stage, VkShaderStageFlagBits active_nir_st
|
|||
}
|
||||
|
||||
static void
|
||||
radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type pipeline_type,
|
||||
radv_fill_shader_info(const struct radv_compiler_info *compiler_info, const enum radv_pipeline_type pipeline_type,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages,
|
||||
VkShaderStageFlagBits active_nir_stages)
|
||||
{
|
||||
|
|
@ -1991,23 +1990,22 @@ radv_fill_shader_info(struct radv_device *device, const enum radv_pipeline_type
|
|||
consider_force_vrs = radv_consider_force_vrs(gfx_state, &stages[i], &stages[MESA_SHADER_FRAGMENT]);
|
||||
}
|
||||
|
||||
radv_nir_shader_info_pass(device, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state, pipeline_type,
|
||||
consider_force_vrs, &stages[i].info);
|
||||
radv_nir_shader_info_pass(compiler_info, stages[i].nir, &stages[i].layout, &stages[i].key, gfx_state,
|
||||
pipeline_type, consider_force_vrs, &stages[i].info);
|
||||
}
|
||||
|
||||
radv_nir_shader_info_link(device, gfx_state, stages);
|
||||
radv_nir_shader_info_link(compiler_info, gfx_state, stages);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage *stages,
|
||||
radv_declare_pipeline_args(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
|
||||
const struct radv_graphics_state_key *gfx_state, VkShaderStageFlagBits active_nir_stages,
|
||||
struct radv_shader_debug_info *debug)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
|
||||
|
||||
if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
|
||||
radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_TESS_CTRL].info, MESA_SHADER_TESS_CTRL,
|
||||
MESA_SHADER_VERTEX, &stages[MESA_SHADER_TESS_CTRL].args, &debug[MESA_SHADER_TESS_CTRL]);
|
||||
stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs;
|
||||
stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask =
|
||||
|
|
@ -2024,8 +2022,8 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage
|
|||
|
||||
if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
mesa_shader_stage pre_stage = stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX;
|
||||
radv_declare_shader_args(device, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY, pre_stage,
|
||||
&stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]);
|
||||
radv_declare_shader_args(compiler_info, gfx_state, &stages[MESA_SHADER_GEOMETRY].info, MESA_SHADER_GEOMETRY,
|
||||
pre_stage, &stages[MESA_SHADER_GEOMETRY].args, &debug[MESA_SHADER_GEOMETRY]);
|
||||
stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs;
|
||||
stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask =
|
||||
stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask;
|
||||
|
|
@ -2038,20 +2036,19 @@ radv_declare_pipeline_args(struct radv_device *device, struct radv_shader_stage
|
|||
}
|
||||
|
||||
u_foreach_bit (i, active_nir_stages) {
|
||||
radv_declare_shader_args(device, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args, &debug[i]);
|
||||
radv_declare_shader_args(compiler_info, gfx_state, &stages[i].info, i, MESA_SHADER_NONE, &stages[i].args,
|
||||
&debug[i]);
|
||||
stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs;
|
||||
stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask;
|
||||
}
|
||||
}
|
||||
|
||||
static struct radv_shader_binary *
|
||||
radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
radv_create_gs_copy_shader(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
struct radv_shader_stage *gs_stage, const struct radv_graphics_state_key *gfx_state,
|
||||
bool keep_executable_info, bool keep_statistic_info,
|
||||
struct radv_shader_debug_info *gs_copy_debug)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
const struct radv_shader_info *gs_info = &gs_stage->info;
|
||||
nir_shader *nir = gs_stage->gs_copy_shader;
|
||||
|
|
@ -2068,8 +2065,8 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
|
|||
},
|
||||
};
|
||||
radv_nir_shader_info_init(gs_copy_stage.stage, MESA_SHADER_FRAGMENT, &gs_copy_stage.info);
|
||||
radv_nir_shader_info_pass(device, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS, false,
|
||||
&gs_copy_stage.info);
|
||||
radv_nir_shader_info_pass(compiler_info, nir, &gs_stage->layout, &gs_stage->key, gfx_state, RADV_PIPELINE_GRAPHICS,
|
||||
false, &gs_copy_stage.info);
|
||||
gs_copy_stage.info.wave_size = 64; /* Wave32 not supported. */
|
||||
gs_copy_stage.info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */
|
||||
gs_copy_stage.info.so = gs_info->so;
|
||||
|
|
@ -2077,59 +2074,56 @@ radv_create_gs_copy_shader(struct radv_device *device, struct vk_pipeline_cache
|
|||
gs_copy_stage.info.force_vrs_per_vertex = gs_info->force_vrs_per_vertex;
|
||||
gs_copy_stage.info.type = RADV_SHADER_TYPE_GS_COPY;
|
||||
|
||||
radv_declare_shader_args(device, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
|
||||
radv_declare_shader_args(compiler_info, gfx_state, &gs_copy_stage.info, MESA_SHADER_VERTEX, MESA_SHADER_NONE,
|
||||
&gs_copy_stage.args, gs_copy_debug);
|
||||
gs_copy_stage.info.user_sgprs_locs = gs_copy_stage.args.user_sgprs_locs;
|
||||
gs_copy_stage.info.inline_push_constant_mask = gs_copy_stage.args.ac.inline_push_const_mask;
|
||||
|
||||
NIR_PASS(
|
||||
_, nir, ac_nir_lower_intrinsics_to_args, &gs_copy_stage.args.ac,
|
||||
&(ac_nir_lower_intrinsics_to_args_options){.gfx_level = pdev->info.gfx_level,
|
||||
.has_ls_vgpr_init_bug = pdev->info.compiler_info.has_ls_vgpr_init_bug,
|
||||
&(ac_nir_lower_intrinsics_to_args_options){.gfx_level = compiler_info->ac->gfx_level,
|
||||
.has_ls_vgpr_init_bug = compiler_info->ac->has_ls_vgpr_init_bug,
|
||||
.hw_stage = AC_HW_VERTEX_SHADER,
|
||||
.wave_size = 64,
|
||||
.workgroup_size = 64,
|
||||
.use_llvm = pdev->use_llvm});
|
||||
NIR_PASS(_, nir, radv_nir_lower_abi, pdev->info.gfx_level, &gs_copy_stage, gfx_state, pdev->info.address32_hi);
|
||||
.use_llvm = compiler_info->debug.use_llvm});
|
||||
NIR_PASS(_, nir, radv_nir_lower_abi, compiler_info->ac->gfx_level, &gs_copy_stage, gfx_state, compiler_info->hw.address32_hi);
|
||||
|
||||
NIR_PASS(_, nir, ac_nir_lower_global_access);
|
||||
NIR_PASS(_, nir, nir_lower_int64);
|
||||
|
||||
struct radv_graphics_pipeline_key key = {0};
|
||||
gs_copy_debug->dump_shader = radv_can_dump_shader(device, nir);
|
||||
gs_copy_debug->dump_shader = radv_can_dump_shader(compiler_info, nir);
|
||||
|
||||
if (gs_copy_debug->dump_shader)
|
||||
simple_mtx_lock(&instance->shader_dump_mtx);
|
||||
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm(device, &gs_copy_stage, &nir, 1, &key.gfx_state,
|
||||
keep_executable_info, keep_statistic_info);
|
||||
struct radv_shader_binary *gs_copy_binary = radv_shader_nir_to_asm(
|
||||
compiler_info, &gs_copy_stage, &nir, 1, &key.gfx_state, keep_executable_info, keep_statistic_info);
|
||||
|
||||
char *nir_string = NULL;
|
||||
if (keep_executable_info || gs_copy_debug->dump_shader)
|
||||
nir_string = radv_dump_nir_shaders(instance, &nir, 1);
|
||||
nir_string = radv_dump_nir_shaders(compiler_info, &nir, 1);
|
||||
|
||||
radv_parse_binary_debug_info(device, gs_copy_binary, gs_copy_debug);
|
||||
radv_parse_binary_debug_info(compiler_info, gs_copy_binary, gs_copy_debug);
|
||||
gs_copy_debug->nir_string = nir_string;
|
||||
gs_copy_debug->stages = 1 << MESA_SHADER_VERTEX;
|
||||
radv_shader_dump_asm(device, gs_copy_debug, &gs_copy_stage.info);
|
||||
radv_shader_dump_asm(compiler_info, gs_copy_debug, &gs_copy_stage.info);
|
||||
|
||||
if (gs_copy_debug->dump_shader)
|
||||
simple_mtx_unlock(&instance->shader_dump_mtx);
|
||||
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
return gs_copy_binary;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
radv_graphics_shaders_nir_to_asm(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
|
||||
bool keep_executable_info, bool keep_statistic_info,
|
||||
VkShaderStageFlagBits active_nir_stages, struct radv_shader_debug_info *debug,
|
||||
struct radv_shader_binary **binaries, struct radv_shader_debug_info *gs_copy_debug,
|
||||
struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) {
|
||||
if (!(active_nir_stages & (1 << s)))
|
||||
continue;
|
||||
|
|
@ -2138,7 +2132,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
unsigned shader_count = 1;
|
||||
|
||||
/* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */
|
||||
if (pdev->info.gfx_level >= GFX9 &&
|
||||
if (compiler_info->ac->gfx_level >= GFX9 &&
|
||||
((s == MESA_SHADER_GEOMETRY &&
|
||||
(active_nir_stages & (VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT))) ||
|
||||
(s == MESA_SHADER_TESS_CTRL && (active_nir_stages & VK_SHADER_STAGE_VERTEX_BIT)))) {
|
||||
|
|
@ -2158,12 +2152,12 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
for (unsigned i = 0; i < shader_count; ++i)
|
||||
debug[s].dump_shader |= radv_can_dump_shader(device, nir_shaders[i]);
|
||||
debug[s].dump_shader |= radv_can_dump_shader(compiler_info, nir_shaders[i]);
|
||||
|
||||
bool dump_nir = debug[s].dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
|
||||
bool dump_nir = debug[s].dump_shader && compiler_info->debug.dump_nir;
|
||||
|
||||
if (debug[s].dump_shader) {
|
||||
simple_mtx_lock(&instance->shader_dump_mtx);
|
||||
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
if (dump_nir) {
|
||||
for (uint32_t i = 0; i < shader_count; i++)
|
||||
|
|
@ -2171,23 +2165,23 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
}
|
||||
}
|
||||
|
||||
binaries[s] = radv_shader_nir_to_asm(device, &stages[s], nir_shaders, shader_count, gfx_state,
|
||||
binaries[s] = radv_shader_nir_to_asm(compiler_info, &stages[s], nir_shaders, shader_count, gfx_state,
|
||||
keep_executable_info, keep_statistic_info);
|
||||
|
||||
/* Dump NIR after nir_to_asm, because ACO modifies it. */
|
||||
char *nir_string = NULL;
|
||||
if (keep_executable_info || debug[s].dump_shader)
|
||||
nir_string = radv_dump_nir_shaders(instance, nir_shaders, shader_count);
|
||||
nir_string = radv_dump_nir_shaders(compiler_info, nir_shaders, shader_count);
|
||||
|
||||
radv_parse_binary_debug_info(device, binaries[s], &debug[s]);
|
||||
radv_parse_binary_debug_info(compiler_info, binaries[s], &debug[s]);
|
||||
debug[s].nir_string = nir_string;
|
||||
for (uint32_t i = 0; i < shader_count; i++)
|
||||
debug[s].stages |= 1 << nir_shaders[i]->info.stage;
|
||||
|
||||
radv_shader_dump_asm(device, &debug[s], &stages[s].info);
|
||||
radv_shader_dump_asm(compiler_info, &debug[s], &stages[s].info);
|
||||
|
||||
if (debug[s].dump_shader)
|
||||
simple_mtx_unlock(&instance->shader_dump_mtx);
|
||||
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
if (keep_executable_info && stages[s].spirv.size) {
|
||||
debug[s].spirv = malloc(stages[s].spirv.size);
|
||||
|
|
@ -2196,7 +2190,7 @@ radv_graphics_shaders_nir_to_asm(struct radv_device *device, struct vk_pipeline_
|
|||
}
|
||||
|
||||
if (s == MESA_SHADER_GEOMETRY && !stages[s].info.is_ngg) {
|
||||
*gs_copy_binary = radv_create_gs_copy_shader(device, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
|
||||
*gs_copy_binary = radv_create_gs_copy_shader(compiler_info, cache, &stages[MESA_SHADER_GEOMETRY], gfx_state,
|
||||
keep_executable_info, keep_statistic_info, gs_copy_debug);
|
||||
}
|
||||
|
||||
|
|
@ -2402,16 +2396,15 @@ radv_skip_graphics_pipeline_compile(const struct radv_device *device, const VkGr
|
|||
}
|
||||
|
||||
void
|
||||
radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
|
||||
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
|
||||
struct radv_retained_shaders *retained_shaders, bool noop_fs,
|
||||
struct radv_shader_debug_info *debug, struct radv_shader_binary **binaries,
|
||||
struct radv_shader_debug_info *gs_copy_debug, struct radv_shader_binary **gs_copy_binary)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
const bool nir_cache = instance->perftest_flags & RADV_PERFTEST_NIR_CACHE;
|
||||
const bool nir_cache = compiler_info->enable_nir_cache;
|
||||
|
||||
for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) {
|
||||
if (stages[s].stage == MESA_SHADER_NONE)
|
||||
continue;
|
||||
|
|
@ -2428,12 +2421,12 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
|
||||
if (nir_cache) {
|
||||
radv_hash_graphics_spirv_to_nir(key, &stages[s], &options);
|
||||
stages[s].nir = radv_pipeline_cache_lookup_nir(device, cache, s, key);
|
||||
stages[s].nir = radv_pipeline_cache_lookup_nir(compiler_info, cache, s, key);
|
||||
}
|
||||
if (!stages[s].nir) {
|
||||
stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], &options, is_internal);
|
||||
stages[s].nir = radv_shader_spirv_to_nir(compiler_info, &stages[s], &options, is_internal);
|
||||
if (nir_cache)
|
||||
radv_pipeline_cache_insert_nir(device, cache, key, stages[s].nir);
|
||||
radv_pipeline_cache_insert_nir(compiler_info, cache, key, stages[s].nir);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2450,7 +2443,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
active_nir_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
|
||||
if (!pdev->info.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
if (!compiler_info->hw.mesh_fast_launch_2 && stages[MESA_SHADER_MESH].nir &&
|
||||
BITSET_TEST(stages[MESA_SHADER_MESH].nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID)) {
|
||||
nir_shader *mesh = stages[MESA_SHADER_MESH].nir;
|
||||
nir_shader *task = stages[MESA_SHADER_TASK].nir;
|
||||
|
|
@ -2482,7 +2475,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
}
|
||||
|
||||
/* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */
|
||||
radv_fill_shader_info_ngg(device, stages, active_nir_stages);
|
||||
radv_fill_shader_info_ngg(compiler_info, stages, active_nir_stages);
|
||||
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir) {
|
||||
unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream;
|
||||
|
|
@ -2624,7 +2617,7 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
}
|
||||
|
||||
/* Optimize varyings on lowered shader I/O (more efficient than optimizing I/O derefs). */
|
||||
radv_graphics_shaders_link_varyings(stages, pdev->info.gfx_level);
|
||||
radv_graphics_shaders_link_varyings(stages, compiler_info->ac->gfx_level);
|
||||
|
||||
/* Optimize constant clip/cull distance after linking to operate on scalar io in the last
|
||||
* pre raster stage.
|
||||
|
|
@ -2642,14 +2635,14 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
||||
radv_fill_shader_info(device, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
|
||||
radv_fill_shader_info(compiler_info, RADV_PIPELINE_GRAPHICS, gfx_state, stages, active_nir_stages);
|
||||
|
||||
radv_declare_pipeline_args(device, stages, gfx_state, active_nir_stages, debug);
|
||||
radv_declare_pipeline_args(compiler_info, stages, gfx_state, active_nir_stages, debug);
|
||||
|
||||
radv_foreach_stage (i, active_nir_stages) {
|
||||
int64_t stage_start = os_time_get_nano();
|
||||
|
||||
radv_postprocess_nir(device, gfx_state, &stages[i]);
|
||||
radv_postprocess_nir(compiler_info, gfx_state, &stages[i]);
|
||||
|
||||
stages[i].feedback.duration += os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
|
@ -2661,16 +2654,16 @@ radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cac
|
|||
struct radv_shader_stage *stage = gs_stage ? gs_stage : es_stage;
|
||||
|
||||
if ((gs_stage ? gs_stage : es_stage)->info.is_ngg) {
|
||||
gfx10_get_ngg_info(device, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info);
|
||||
gfx10_get_ngg_info(compiler_info, &es_stage->info, gs_stage ? &gs_stage->info : NULL, &stage->info.ngg_info);
|
||||
stage->info.nir_shared_size = stage->info.ngg_info.lds_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (stages[MESA_SHADER_GEOMETRY].nir && !stages[MESA_SHADER_GEOMETRY].info.is_ngg)
|
||||
radv_get_legacy_gs_info(device, NULL, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
radv_get_legacy_gs_info(compiler_info, NULL, &stages[MESA_SHADER_GEOMETRY].info);
|
||||
|
||||
/* Compile NIR shaders to AMD assembly. */
|
||||
radv_graphics_shaders_nir_to_asm(device, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
|
||||
radv_graphics_shaders_nir_to_asm(compiler_info, cache, stages, gfx_state, keep_executable_info, keep_statistic_info,
|
||||
active_nir_stages, debug, binaries, gs_copy_debug, gs_copy_binary);
|
||||
}
|
||||
|
||||
|
|
@ -2835,10 +2828,11 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
|
|||
const struct radv_graphics_pipeline_state *gfx_state, struct radv_device *device,
|
||||
struct vk_pipeline_cache *cache, bool fast_linking_enabled)
|
||||
{
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL};
|
||||
struct radv_shader_binary *gs_copy_binary = NULL;
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.create_flags);
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.create_flags);
|
||||
bool skip_shaders_cache = radv_pipeline_skip_shaders_cache(device, &pipeline->base);
|
||||
struct radv_shader_stage *stages = gfx_state->stages;
|
||||
const VkPipelineCreationFeedbackCreateInfo *creation_feedback =
|
||||
|
|
@ -2918,7 +2912,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
|
|||
|
||||
struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0};
|
||||
struct radv_shader_debug_info gs_copy_debug = {0};
|
||||
radv_graphics_shaders_compile(device, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
|
||||
radv_graphics_shaders_compile(compiler_info, cache, stages, &gfx_state->key.gfx_state, keep_executable_info,
|
||||
keep_statistic_info, pipeline->base.is_internal, retained_shaders, noop_fs, debug,
|
||||
binaries, &gs_copy_debug, &gs_copy_binary);
|
||||
radv_graphics_shaders_create(device, cache, skip_shaders_cache, pipeline->base.shaders, binaries, debug,
|
||||
|
|
@ -2932,7 +2926,7 @@ radv_graphics_pipeline_compile(struct radv_graphics_pipeline *pipeline, const Vk
|
|||
for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) {
|
||||
free(binaries[i]);
|
||||
if (stages[i].nir) {
|
||||
if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->base.shaders[i]) {
|
||||
if (radv_can_dump_shader_stats(&device->compiler_info, stages[i].nir) && pipeline->base.shaders[i]) {
|
||||
radv_dump_shader_stats(device, &pipeline->base, pipeline->base.shaders[i], stderr);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -651,10 +651,10 @@ struct radv_ps_epilog_state {
|
|||
uint8_t need_src_alpha;
|
||||
};
|
||||
|
||||
struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_device *device,
|
||||
struct radv_ps_epilog_key radv_generate_ps_epilog_key(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_ps_epilog_state *state);
|
||||
|
||||
void radv_graphics_shaders_compile(struct radv_device *device, struct vk_pipeline_cache *cache,
|
||||
void radv_graphics_shaders_compile(const struct radv_compiler_info *compiler_info, struct vk_pipeline_cache *cache,
|
||||
struct radv_shader_stage *stages, const struct radv_graphics_state_key *gfx_state,
|
||||
bool keep_executable_info, bool keep_statistic_info, bool is_internal,
|
||||
struct radv_retained_shaders *retained_shaders, bool noop_fs,
|
||||
|
|
|
|||
|
|
@ -374,16 +374,13 @@ move_rt_instructions(nir_shader *shader)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline *pipeline,
|
||||
radv_rt_nir_to_asm(const struct radv_compiler_info *compiler_info, struct radv_ray_tracing_pipeline *pipeline,
|
||||
enum radv_rt_lowering_mode mode, struct radv_shader_stage *stage, uint32_t *payload_size,
|
||||
uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *stage_info,
|
||||
const struct radv_ray_tracing_stage_info *traversal_stage_info, bool has_position_fetch,
|
||||
struct radv_shader_binary **binary, bool keep_executable_info, bool keep_statistic_info,
|
||||
struct radv_shader_debug_info *debug)
|
||||
{
|
||||
struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
|
||||
switch (mode) {
|
||||
case RADV_RT_LOWERING_MODE_MONOLITHIC:
|
||||
radv_nir_lower_rt_io_monolithic(stage->nir);
|
||||
|
|
@ -399,11 +396,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
/* Gather shader info. */
|
||||
nir_shader_gather_info(stage->nir, nir_shader_get_entrypoint(stage->nir));
|
||||
radv_nir_shader_info_init(stage->stage, MESA_SHADER_NONE, &stage->info);
|
||||
radv_nir_shader_info_pass(device, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING, false,
|
||||
&stage->info);
|
||||
radv_nir_shader_info_pass(compiler_info, stage->nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING,
|
||||
false, &stage->info);
|
||||
|
||||
/* Declare shader arguments. */
|
||||
radv_declare_shader_args(device, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug);
|
||||
radv_declare_shader_args(compiler_info, NULL, &stage->info, stage->stage, MESA_SHADER_NONE, &stage->args, debug);
|
||||
|
||||
stage->info.user_sgprs_locs = stage->args.user_sgprs_locs;
|
||||
stage->info.inline_push_constant_mask = stage->args.ac.inline_push_const_mask;
|
||||
|
|
@ -427,7 +424,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
.stack_alignment = 16,
|
||||
.localized_loads = true,
|
||||
.vectorizer_callback = ac_nir_mem_vectorize_callback,
|
||||
.vectorizer_data = &(struct ac_nir_config){pdev->info.gfx_level, !pdev->use_llvm},
|
||||
.vectorizer_data = &(struct ac_nir_config){compiler_info->ac->gfx_level, !compiler_info->debug.use_llvm},
|
||||
};
|
||||
nir_lower_shader_calls(stage->nir, &opts, &resume_shaders, &num_resume_shaders, mem_ctx);
|
||||
}
|
||||
|
|
@ -449,27 +446,27 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
switch (mode) {
|
||||
case RADV_RT_LOWERING_MODE_MONOLITHIC:
|
||||
assert(num_shaders == 1);
|
||||
radv_nir_lower_rt_abi_monolithic(temp_stage.nir, device, pipeline);
|
||||
radv_nir_lower_rt_abi_monolithic(temp_stage.nir, compiler_info, pipeline);
|
||||
break;
|
||||
case RADV_RT_LOWERING_MODE_CPS:
|
||||
radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, device, pipeline, has_position_fetch,
|
||||
radv_nir_lower_rt_abi_cps(temp_stage.nir, &stage->info, i > 0, compiler_info, pipeline, has_position_fetch,
|
||||
traversal_stage_info);
|
||||
break;
|
||||
case RADV_RT_LOWERING_MODE_FUNCTION_CALLS:
|
||||
assert(num_shaders == 1);
|
||||
radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size, device,
|
||||
pipeline);
|
||||
radv_nir_lower_rt_abi_functions(temp_stage.nir, &temp_stage.info, *payload_size, *hit_attrib_size,
|
||||
compiler_info, pipeline);
|
||||
break;
|
||||
}
|
||||
|
||||
/* Info might be out-of-date after inlining in radv_nir_lower_rt_abi(). */
|
||||
nir_shader_gather_info(temp_stage.nir, radv_get_rt_shader_entrypoint(temp_stage.nir));
|
||||
|
||||
radv_nir_shader_info_pass(device, temp_stage.nir, &stage->layout, &stage->key, NULL, RADV_PIPELINE_RAY_TRACING,
|
||||
false, &stage->info);
|
||||
radv_nir_shader_info_pass(compiler_info, temp_stage.nir, &stage->layout, &stage->key, NULL,
|
||||
RADV_PIPELINE_RAY_TRACING, false, &stage->info);
|
||||
|
||||
radv_optimize_nir(temp_stage.nir, temp_stage.key.optimisations_disabled);
|
||||
radv_postprocess_nir(device, NULL, &temp_stage);
|
||||
radv_postprocess_nir(compiler_info, NULL, &temp_stage);
|
||||
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_call_abi, stage->info.wave_size);
|
||||
NIR_PASS(_, stage->nir, nir_lower_global_vars_to_local);
|
||||
|
|
@ -484,11 +481,11 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
radv_gather_unused_args(stage_info, temp_stage.nir);
|
||||
}
|
||||
|
||||
debug->dump_shader = radv_can_dump_shader(device, stage->nir);
|
||||
bool dump_nir = debug->dump_shader && (instance->debug_flags & RADV_DEBUG_DUMP_NIR);
|
||||
debug->dump_shader = radv_can_dump_shader(compiler_info, stage->nir);
|
||||
bool dump_nir = debug->dump_shader && compiler_info->debug.dump_nir;
|
||||
|
||||
if (debug->dump_shader) {
|
||||
simple_mtx_lock(&instance->shader_dump_mtx);
|
||||
simple_mtx_lock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
if (dump_nir) {
|
||||
for (uint32_t i = 0; i < num_shaders; i++)
|
||||
|
|
@ -497,17 +494,17 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
}
|
||||
|
||||
/* Compile NIR shader to AMD assembly. */
|
||||
*binary =
|
||||
radv_shader_nir_to_asm(device, stage, shaders, num_shaders, NULL, keep_executable_info, keep_statistic_info);
|
||||
*binary = radv_shader_nir_to_asm(compiler_info, stage, shaders, num_shaders, NULL, keep_executable_info,
|
||||
keep_statistic_info);
|
||||
|
||||
/* Dump NIR after nir_to_asm, because ACO modifies it. */
|
||||
if (keep_executable_info || debug->dump_shader)
|
||||
debug->nir_string = radv_dump_nir_shaders(instance, shaders, num_shaders);
|
||||
debug->nir_string = radv_dump_nir_shaders(compiler_info, shaders, num_shaders);
|
||||
|
||||
radv_parse_binary_debug_info(device, *binary, debug);
|
||||
radv_parse_binary_debug_info(compiler_info, *binary, debug);
|
||||
debug->stages = 1 << shaders[0]->info.stage;
|
||||
|
||||
radv_shader_dump_asm(device, debug, &stage->info);
|
||||
radv_shader_dump_asm(compiler_info, debug, &stage->info);
|
||||
|
||||
if (keep_executable_info && stage->spirv.size) {
|
||||
debug->spirv = malloc(stage->spirv.size);
|
||||
|
|
@ -516,7 +513,7 @@ radv_rt_nir_to_asm(struct radv_device *device, struct radv_ray_tracing_pipeline
|
|||
}
|
||||
|
||||
if (debug->dump_shader)
|
||||
simple_mtx_unlock(&instance->shader_dump_mtx);
|
||||
simple_mtx_unlock(compiler_info->debug.shader_dump_mtx);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
}
|
||||
|
|
@ -530,16 +527,18 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|||
struct radv_serialized_shader_arena_block *replay_block, bool skip_shaders_cache,
|
||||
bool has_position_fetch, struct radv_shader **out_shader)
|
||||
{
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(device, pipeline->base.base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(device, pipeline->base.base.create_flags);
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
bool keep_executable_info = radv_pipeline_capture_shaders(compiler_info, pipeline->base.base.create_flags);
|
||||
bool keep_statistic_info = radv_pipeline_capture_shader_stats(compiler_info, pipeline->base.base.create_flags);
|
||||
bool replayable = (pipeline->base.base.create_flags &
|
||||
VK_PIPELINE_CREATE_2_RAY_TRACING_SHADER_GROUP_HANDLE_CAPTURE_REPLAY_BIT_KHR) &&
|
||||
!radv_is_traversal_shader(stage->nir);
|
||||
|
||||
struct radv_shader_binary *binary;
|
||||
struct radv_shader_debug_info debug = {0};
|
||||
radv_rt_nir_to_asm(device, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info, traversal_stage_info,
|
||||
has_position_fetch, &binary, keep_executable_info, keep_statistic_info, &debug);
|
||||
radv_rt_nir_to_asm(compiler_info, pipeline, mode, stage, payload_size, hit_attrib_size, stage_info,
|
||||
traversal_stage_info, has_position_fetch, &binary, keep_executable_info, keep_statistic_info,
|
||||
&debug);
|
||||
|
||||
struct radv_shader *shader;
|
||||
if (replay_block || replayable) {
|
||||
|
|
@ -559,7 +558,7 @@ radv_rt_compile_nir(struct radv_device *device, struct vk_pipeline_cache *cache,
|
|||
|
||||
*out_shader = shader;
|
||||
|
||||
if (radv_can_dump_shader_stats(device, stage->nir))
|
||||
if (radv_can_dump_shader_stats(compiler_info, stage->nir))
|
||||
radv_dump_shader_stats(device, &pipeline->base.base, shader, stderr);
|
||||
|
||||
return shader ? VK_SUCCESS : VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
|
@ -653,10 +652,10 @@ radv_ray_tracing_stage_nir_always_needed(struct radv_ray_tracing_stage *stage)
|
|||
}
|
||||
|
||||
static void
|
||||
radv_rt_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage, uint32_t *payload_size,
|
||||
uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info)
|
||||
radv_rt_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
|
||||
uint32_t *payload_size, uint32_t *hit_attrib_size, struct radv_ray_tracing_stage_info *info)
|
||||
{
|
||||
stage->nir = radv_shader_spirv_to_nir(device, stage, NULL, false);
|
||||
stage->nir = radv_shader_spirv_to_nir(compiler_info, stage, NULL, false);
|
||||
|
||||
nir_foreach_variable_with_modes (var, stage->nir, nir_var_ray_hit_attrib) {
|
||||
unsigned size, alignment;
|
||||
|
|
@ -729,7 +728,7 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
|||
&stage_keys[s], stage);
|
||||
|
||||
/* precompile the shader */
|
||||
radv_rt_spirv_to_nir(device, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info);
|
||||
radv_rt_spirv_to_nir(&device->compiler_info, stage, &payload_size, &hit_attrib_size, &rt_stages[i].info);
|
||||
|
||||
stage->feedback.duration = os_time_get_nano() - stage_start;
|
||||
}
|
||||
|
|
@ -854,12 +853,13 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
|||
if (ahit_idx == VK_SHADER_UNUSED_KHR && isec_idx == VK_SHADER_UNUSED_KHR)
|
||||
continue;
|
||||
|
||||
const struct radv_compiler_info *compiler_info = &device->compiler_info;
|
||||
nir_shader *ahit = NULL;
|
||||
nir_shader *isec = NULL;
|
||||
if (ahit_idx != VK_SHADER_UNUSED_KHR)
|
||||
ahit = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[ahit_idx].nir);
|
||||
ahit = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[ahit_idx].nir);
|
||||
if (isec_idx != VK_SHADER_UNUSED_KHR)
|
||||
isec = radv_pipeline_cache_handle_to_nir(device, pipeline->stages[isec_idx].nir);
|
||||
isec = radv_pipeline_cache_handle_to_nir(compiler_info, pipeline->stages[isec_idx].nir);
|
||||
|
||||
nir_shader *final_shader;
|
||||
if (isec) {
|
||||
|
|
@ -934,8 +934,8 @@ radv_rt_compile_shaders(struct radv_device *device, struct vk_pipeline_cache *ca
|
|||
preprocess = NULL;
|
||||
|
||||
/* create traversal shader */
|
||||
nir_shader *traversal_nir =
|
||||
radv_build_traversal_shader(device, pipeline, &traversal_info, preprocess, payload_size, hit_attrib_size);
|
||||
nir_shader *traversal_nir = radv_build_traversal_shader(&device->compiler_info, pipeline, &traversal_info,
|
||||
preprocess, payload_size, hit_attrib_size);
|
||||
struct radv_shader_stage traversal_stage = {
|
||||
.stage = MESA_SHADER_INTERSECTION,
|
||||
.nir = traversal_nir,
|
||||
|
|
@ -1049,10 +1049,10 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
|
|||
|
||||
struct radv_shader_stage prolog_stage = {0};
|
||||
struct radv_shader_debug_info debug = {0};
|
||||
radv_build_rt_prolog(device, &prolog_stage, uses_descriptor_heap, &debug);
|
||||
radv_build_rt_prolog(&device->compiler_info, &prolog_stage, uses_descriptor_heap, &debug);
|
||||
prolog_stage.nir->options = &pdev->nir_options[MESA_SHADER_COMPUTE];
|
||||
radv_optimize_nir(prolog_stage.nir, false);
|
||||
radv_postprocess_nir(device, NULL, &prolog_stage);
|
||||
radv_postprocess_nir(&device->compiler_info, NULL, &prolog_stage);
|
||||
|
||||
NIR_PASS(_, prolog_stage.nir, radv_nir_lower_call_abi, prolog_stage.info.wave_size);
|
||||
NIR_PASS(_, prolog_stage.nir, nir_lower_global_vars_to_local);
|
||||
|
|
|
|||
|
|
@ -132,10 +132,10 @@ radv_tex_filter_mode(VkSamplerReductionMode mode)
|
|||
}
|
||||
|
||||
static uint32_t
|
||||
radv_get_max_anisotropy(const struct radv_device *device, const struct vk_sampler_state *sampler_state)
|
||||
radv_get_max_anisotropy(const struct radv_compiler_info *compiler_info, const struct vk_sampler_state *sampler_state)
|
||||
{
|
||||
if (device->force_aniso >= 0)
|
||||
return device->force_aniso;
|
||||
if (compiler_info->force_aniso >= 0)
|
||||
return compiler_info->force_aniso;
|
||||
|
||||
if (sampler_state->anisotropy_enable && sampler_state->max_anisotropy > 1.0f)
|
||||
return (uint32_t)sampler_state->max_anisotropy;
|
||||
|
|
@ -199,19 +199,17 @@ radv_unregister_border_color(struct radv_device *device, uint32_t index)
|
|||
}
|
||||
|
||||
void
|
||||
radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state,
|
||||
uint32_t *desc)
|
||||
radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info,
|
||||
const struct vk_sampler_state *sampler_state, uint32_t *desc)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const struct radv_instance *instance = radv_physical_device_instance(pdev);
|
||||
const uint32_t max_aniso = radv_get_max_anisotropy(device, sampler_state);
|
||||
const uint32_t max_aniso = radv_get_max_anisotropy(compiler_info, sampler_state);
|
||||
const uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
|
||||
const unsigned filter_mode = radv_tex_filter_mode(sampler_state->reduction_mode);
|
||||
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
|
||||
const bool trunc_coord =
|
||||
((sampler_state->min_filter == VK_FILTER_NEAREST && sampler_state->mag_filter == VK_FILTER_NEAREST) ||
|
||||
pdev->info.compiler_info.conformant_trunc_coord) &&
|
||||
!instance->drirc.debug.disable_trunc_coord;
|
||||
compiler_info->ac->conformant_trunc_coord) &&
|
||||
!compiler_info->cache_key->disable_trunc_coord;
|
||||
const VkBorderColor border_color = radv_get_border_color(sampler_state);
|
||||
const bool disable_cube_wrap = sampler_state->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
|
||||
|
||||
|
|
@ -238,12 +236,12 @@ radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_s
|
|||
.min_lod = sampler_state->min_lod,
|
||||
.max_lod = sampler_state->max_lod,
|
||||
.lod_bias = sampler_state->mip_lod_bias,
|
||||
.aniso_single_level = !instance->drirc.debug.disable_aniso_single_level,
|
||||
.aniso_single_level = !compiler_info->cache_key->disable_aniso_single_level,
|
||||
.border_color_type = radv_tex_bordercolor(border_color),
|
||||
.border_color_ptr = border_color_ptr,
|
||||
};
|
||||
|
||||
ac_build_sampler_descriptor(pdev->info.gfx_level, &ac_state, desc);
|
||||
ac_build_sampler_descriptor(compiler_info->ac->gfx_level, &ac_state, desc);
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
|
@ -285,7 +283,7 @@ radv_sampler_init(struct radv_device *device, struct radv_sampler *sampler, cons
|
|||
}
|
||||
}
|
||||
|
||||
radv_make_sampler_descriptor(device, &sampler_state, sampler->state);
|
||||
radv_make_sampler_descriptor(&device->compiler_info, &sampler_state, sampler->state);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
#include "vk_sampler.h"
|
||||
|
||||
struct radv_device;
|
||||
struct radv_compiler_info;
|
||||
|
||||
struct radv_sampler {
|
||||
struct vk_sampler vk;
|
||||
|
|
@ -28,7 +29,7 @@ VkResult radv_sampler_init(struct radv_device *device, struct radv_sampler *samp
|
|||
const VkSamplerCreateInfo *pCreateInfo);
|
||||
void radv_sampler_finish(struct radv_device *device, struct radv_sampler *sampler);
|
||||
|
||||
void radv_make_sampler_descriptor(const struct radv_device *device, const struct vk_sampler_state *sampler_state,
|
||||
uint32_t *desc);
|
||||
void radv_make_sampler_descriptor(const struct radv_compiler_info *compiler_info,
|
||||
const struct vk_sampler_state *sampler_state, uint32_t *desc);
|
||||
|
||||
#endif /* RADV_SAMPLER_H */
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -607,7 +607,7 @@ void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets, bool opt_
|
|||
|
||||
struct radv_shader_stage;
|
||||
|
||||
nir_shader *radv_shader_spirv_to_nir(struct radv_device *device, struct radv_shader_stage *stage,
|
||||
nir_shader *radv_shader_spirv_to_nir(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stage,
|
||||
const struct radv_spirv_to_nir_options *options, bool is_internal);
|
||||
|
||||
void radv_init_shader_arenas(struct radv_device *device);
|
||||
|
|
@ -617,23 +617,22 @@ void radv_destroy_shader_upload_queue(struct radv_device *device);
|
|||
|
||||
struct radv_shader_args;
|
||||
|
||||
VkResult radv_parse_binary_debug_info(struct radv_device *device, const struct radv_shader_binary *binary,
|
||||
VkResult radv_parse_binary_debug_info(const struct radv_compiler_info *compiler_info, const struct radv_shader_binary *binary,
|
||||
struct radv_shader_debug_info *dbg);
|
||||
|
||||
VkResult radv_shader_create_uncached(struct radv_device *device, const struct radv_shader_binary *binary,
|
||||
bool replayable, struct radv_serialized_shader_arena_block *replay_block,
|
||||
struct radv_shader_debug_info *dbg, struct radv_shader **out_shader);
|
||||
|
||||
struct radv_shader_binary *radv_shader_nir_to_asm(struct radv_device *device, struct radv_shader_stage *pl_stage,
|
||||
struct nir_shader *const *shaders, int shader_count,
|
||||
const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_binary *radv_shader_nir_to_asm(const struct radv_compiler_info *compiler_info,
|
||||
struct radv_shader_stage *pl_stage, struct nir_shader *const *shaders,
|
||||
int shader_count, const struct radv_graphics_state_key *gfx_state,
|
||||
bool keep_shader_info, bool keep_statistic_info);
|
||||
|
||||
void radv_shader_dump_asm(struct radv_device *device, const struct radv_shader_debug_info *debug,
|
||||
void radv_shader_dump_asm(const struct radv_compiler_info *compiler_info, const struct radv_shader_debug_info *debug,
|
||||
const struct radv_shader_info *info);
|
||||
|
||||
struct radv_instance;
|
||||
char *radv_dump_nir_shaders(const struct radv_instance *instance, struct nir_shader *const *shaders, int shader_count);
|
||||
char *radv_dump_nir_shaders(const struct radv_compiler_info *compiler_info, struct nir_shader *const *shaders, int shader_count);
|
||||
|
||||
VkResult radv_shader_wait_for_upload(struct radv_device *device, uint64_t seq);
|
||||
|
||||
|
|
@ -692,9 +691,9 @@ unsigned radv_compute_spi_ps_input(enum amd_gfx_level gfx_level, const struct ra
|
|||
|
||||
bool radv_is_traversal_shader(nir_shader *nir);
|
||||
|
||||
bool radv_can_dump_shader(struct radv_device *device, nir_shader *nir);
|
||||
bool radv_can_dump_shader(const struct radv_compiler_info *compiler_info, nir_shader *nir);
|
||||
|
||||
bool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir);
|
||||
bool radv_can_dump_shader_stats(const struct radv_compiler_info *compiler_info, nir_shader *nir);
|
||||
|
||||
VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, struct radv_shader *shader,
|
||||
FILE *output);
|
||||
|
|
@ -751,15 +750,16 @@ get_tcs_input_vertex_stride(unsigned tcs_num_inputs)
|
|||
return stride;
|
||||
}
|
||||
|
||||
void radv_get_tess_wg_info(const struct radv_physical_device *pdev, const ac_nir_tess_io_info *io_info,
|
||||
void radv_get_tess_wg_info(const struct radv_compiler_info *compiler_info, const ac_nir_tess_io_info *io_info,
|
||||
unsigned tcs_vertices_out, unsigned tcs_num_input_vertices, unsigned tcs_num_lds_inputs,
|
||||
unsigned *num_patches_per_wg, unsigned *lds_size);
|
||||
|
||||
void radv_lower_ngg(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
void radv_lower_ngg(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage,
|
||||
const struct radv_graphics_state_key *gfx_state);
|
||||
|
||||
bool radv_consider_culling(const struct radv_physical_device *pdev, struct nir_shader *nir, uint64_t ps_inputs_read,
|
||||
unsigned num_vertices_per_primitive, const struct radv_shader_info *info);
|
||||
bool radv_consider_culling(const struct radv_compiler_info *compiler_info, struct nir_shader *nir,
|
||||
uint64_t ps_inputs_read, unsigned num_vertices_per_primitive,
|
||||
const struct radv_shader_info *info);
|
||||
|
||||
void radv_get_nir_options(struct radv_physical_device *pdev);
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,6 @@
|
|||
*/
|
||||
|
||||
#include "radv_shader_args.h"
|
||||
#include "radv_device.h"
|
||||
#include "radv_physical_device.h"
|
||||
#include "radv_shader.h"
|
||||
|
||||
#include "util/memstream.h"
|
||||
|
|
@ -322,11 +320,9 @@ declare_ms_input_sgprs(struct radv_shader_args_state *state, const struct radv_s
|
|||
}
|
||||
|
||||
static void
|
||||
declare_ms_input_vgprs(struct radv_shader_args_state *state, const struct radv_device *device)
|
||||
declare_ms_input_vgprs(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
if (compiler_info->hw.mesh_fast_launch_2) {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.vertex_id);
|
||||
|
|
@ -378,13 +374,13 @@ declare_ngg_sgprs(struct radv_shader_args_state *state, const struct radv_shader
|
|||
}
|
||||
|
||||
static void
|
||||
radv_init_shader_args(struct radv_shader_args_state *state, const struct radv_device *device, mesa_shader_stage stage)
|
||||
radv_init_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state,
|
||||
mesa_shader_stage stage)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
memset(state->args, 0, sizeof(*state->args));
|
||||
|
||||
state->args->explicit_scratch_args = !pdev->use_llvm;
|
||||
state->args->remap_spi_ps_input = !pdev->use_llvm;
|
||||
state->args->explicit_scratch_args = !compiler_info->debug.use_llvm;
|
||||
state->args->remap_spi_ps_input = !compiler_info->debug.use_llvm;
|
||||
|
||||
for (int i = 0; i < MAX_SETS; i++)
|
||||
state->args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
|
||||
|
|
@ -557,12 +553,11 @@ declare_unmerged_vs_tes_gs_args(struct radv_shader_args_state *state, const enum
|
|||
}
|
||||
|
||||
static void
|
||||
declare_shader_args(struct radv_shader_args_state *state, const struct radv_device *device,
|
||||
declare_shader_args(const struct radv_compiler_info *compiler_info, struct radv_shader_args_state *state,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
|
||||
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct user_sgpr_info *user_sgpr_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
|
||||
bool has_shader_query = info->has_prim_query || info->has_xfb_query ||
|
||||
(stage == MESA_SHADER_GEOMETRY && info->gs.has_pipeline_stat_query) ||
|
||||
(stage == MESA_SHADER_MESH && info->ms.has_query) ||
|
||||
|
|
@ -590,7 +585,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
|
|||
}
|
||||
}
|
||||
|
||||
radv_init_shader_args(state, device, stage);
|
||||
radv_init_shader_args(compiler_info, state, stage);
|
||||
|
||||
if (mesa_shader_stage_is_rt(stage)) {
|
||||
return;
|
||||
|
|
@ -617,7 +612,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
|
|||
declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info);
|
||||
|
||||
if (info->cs.uses_grid_size) {
|
||||
if (device->load_grid_size_from_user_sgpr)
|
||||
if (compiler_info->load_grid_size_from_user_sgpr)
|
||||
RADV_ADD_UD_ARG(state, 3, AC_ARG_VALUE, ac.num_work_groups, AC_UD_CS_GRID_SIZE);
|
||||
else
|
||||
RADV_ADD_UD_ARG(state, 2, AC_ARG_CONST_ADDR, ac.num_work_groups, AC_UD_CS_GRID_SIZE);
|
||||
|
|
@ -660,7 +655,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
|
|||
RADV_ADD_ARG(state, AC_ARG_SGPR, 1, AC_ARG_VALUE, ac.scratch_offset);
|
||||
}
|
||||
|
||||
if (pdev->info.compiler_info.local_invocation_ids_packed) {
|
||||
if (compiler_info->ac->local_invocation_ids_packed) {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_ids_packed);
|
||||
} else {
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.local_invocation_id_x);
|
||||
|
|
@ -830,11 +825,11 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
|
|||
|
||||
declare_ngg_sgprs(state, info, ngg_needs_state_sgpr);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX11 && has_shader_query)
|
||||
if (gfx_level >= GFX11 && has_shader_query)
|
||||
RADV_ADD_UD_ARG(state, 1, AC_ARG_VALUE, ngg_query_buf_va, AC_UD_NGG_QUERY_BUF_VA);
|
||||
}
|
||||
|
||||
if (previous_stage != MESA_SHADER_MESH || !pdev->info.mesh_fast_launch_2) {
|
||||
if (previous_stage != MESA_SHADER_MESH || !compiler_info->hw.mesh_fast_launch_2) {
|
||||
if (gfx_level >= GFX12) {
|
||||
RADV_ADD_ARRAY_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_vtx_offset, 0);
|
||||
RADV_ADD_ARG(state, AC_ARG_VGPR, 1, AC_ARG_VALUE, ac.gs_prim_id);
|
||||
|
|
@ -854,7 +849,7 @@ declare_shader_args(struct radv_shader_args_state *state, const struct radv_devi
|
|||
} else if (previous_stage == MESA_SHADER_TESS_EVAL) {
|
||||
declare_tes_input_vgprs(state);
|
||||
} else if (previous_stage == MESA_SHADER_MESH) {
|
||||
declare_ms_input_vgprs(state, device);
|
||||
declare_ms_input_vgprs(compiler_info, state);
|
||||
}
|
||||
} else {
|
||||
declare_global_input_sgprs(state, gfx_level, info, user_sgpr_info);
|
||||
|
|
@ -964,9 +959,10 @@ radv_gather_shader_args_debug_info(struct radv_shader_args_state *state, struct
|
|||
}
|
||||
|
||||
void
|
||||
radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
const struct radv_shader_info *info, mesa_shader_stage stage, mesa_shader_stage previous_stage,
|
||||
struct radv_shader_args *args, struct radv_shader_debug_info *debug)
|
||||
radv_declare_shader_args(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
|
||||
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args,
|
||||
struct radv_shader_debug_info *debug)
|
||||
{
|
||||
struct radv_shader_args_state state = {
|
||||
.args = args,
|
||||
|
|
@ -975,7 +971,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
struct user_sgpr_info user_sgpr_info = {0};
|
||||
|
||||
if (!mesa_shader_stage_is_rt(stage)) {
|
||||
declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, NULL);
|
||||
declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, NULL);
|
||||
|
||||
uint32_t num_user_sgprs = args->num_user_sgprs;
|
||||
if (info->loads_push_constants)
|
||||
|
|
@ -986,8 +982,7 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
num_user_sgprs++;
|
||||
}
|
||||
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
|
||||
uint32_t available_sgprs =
|
||||
gfx_level >= GFX9 && stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_TASK ? 32 : 16;
|
||||
uint32_t remaining_sgprs = available_sgprs - num_user_sgprs;
|
||||
|
|
@ -1012,13 +1007,13 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
allocate_inline_push_consts(info, &user_sgpr_info);
|
||||
}
|
||||
|
||||
state.gather_debug_info = debug && device->keep_shader_info;
|
||||
state.gather_debug_info = debug && compiler_info->debug.keep_shader_info;
|
||||
if (state.gather_debug_info) {
|
||||
state.ctx = ralloc_context(NULL);
|
||||
state.gather_debug_info &= !!state.ctx;
|
||||
}
|
||||
|
||||
declare_shader_args(&state, device, gfx_state, info, stage, previous_stage, &user_sgpr_info);
|
||||
declare_shader_args(compiler_info, &state, gfx_state, info, stage, previous_stage, &user_sgpr_info);
|
||||
|
||||
if (state.gather_debug_info)
|
||||
radv_gather_shader_args_debug_info(&state, debug);
|
||||
|
|
@ -1027,14 +1022,14 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_gra
|
|||
}
|
||||
|
||||
void
|
||||
radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
|
||||
radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key,
|
||||
struct radv_shader_args *args)
|
||||
{
|
||||
struct radv_shader_args_state state = {
|
||||
.args = args,
|
||||
};
|
||||
|
||||
radv_init_shader_args(&state, device, MESA_SHADER_FRAGMENT);
|
||||
radv_init_shader_args(compiler_info, &state, MESA_SHADER_FRAGMENT);
|
||||
|
||||
/* Declare VGPR arguments for depth/stencil/sample exports. */
|
||||
if (key->export_depth)
|
||||
|
|
|
|||
|
|
@ -135,15 +135,15 @@ radv_shader_args_from_ac(struct ac_shader_args *args)
|
|||
struct radv_graphics_state_key;
|
||||
struct radv_shader_info;
|
||||
struct radv_ps_epilog_key;
|
||||
struct radv_device;
|
||||
struct radv_shader_debug_info;
|
||||
struct radv_compiler_info;
|
||||
|
||||
void radv_declare_shader_args(const struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
const struct radv_shader_info *info, mesa_shader_stage stage,
|
||||
mesa_shader_stage previous_stage, struct radv_shader_args *args,
|
||||
void radv_declare_shader_args(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_info *info,
|
||||
mesa_shader_stage stage, mesa_shader_stage previous_stage, struct radv_shader_args *args,
|
||||
struct radv_shader_debug_info *debug);
|
||||
|
||||
void radv_declare_ps_epilog_args(const struct radv_device *device, const struct radv_ps_epilog_key *key,
|
||||
void radv_declare_ps_epilog_args(const struct radv_compiler_info *compiler_info, const struct radv_ps_epilog_key *key,
|
||||
struct radv_shader_args *args);
|
||||
|
||||
void radv_declare_rt_shader_args(enum amd_gfx_level gfx_level, struct radv_shader_args *args);
|
||||
|
|
|
|||
|
|
@ -481,13 +481,12 @@ radv_compute_esgs_itemsize(enum amd_gfx_level gfx_level, uint32_t num_varyings)
|
|||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_ngg_query(struct radv_device *device, struct radv_shader_info *info)
|
||||
gather_shader_info_ngg_query(const struct radv_compiler_info *compiler_info, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
info->gs.has_pipeline_stat_query = pdev->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY;
|
||||
info->gs.has_pipeline_stat_query =
|
||||
compiler_info->emulate_ngg_gs_query_pipeline_stat && info->stage == MESA_SHADER_GEOMETRY;
|
||||
info->has_xfb_query = !!info->so.enabled_stream_buffers_mask;
|
||||
info->has_prim_query = device->cache_key.primitives_generated_query || info->has_xfb_query;
|
||||
info->has_prim_query = compiler_info->primitives_generated_query || info->has_xfb_query;
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
|
@ -525,12 +524,10 @@ radv_gather_unlinked_patch_io_mask(const uint64_t nir_io_mask, const uint32_t ni
|
|||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
|
||||
gather_shader_info_vs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
const struct radv_graphics_state_key *gfx_state, const struct radv_shader_stage_key *stage_key,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
if (radv_use_vs_prolog(nir, gfx_state)) {
|
||||
info->vs.has_prolog = true;
|
||||
info->vs.dynamic_inputs = true;
|
||||
|
|
@ -567,19 +564,18 @@ gather_shader_info_vs(struct radv_device *device, const nir_shader *nir,
|
|||
info->vs.as_ls = true;
|
||||
} else if (info->next_stage == MESA_SHADER_GEOMETRY) {
|
||||
info->vs.as_es = true;
|
||||
info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->vs.num_linked_outputs);
|
||||
info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->vs.num_linked_outputs);
|
||||
}
|
||||
|
||||
if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) {
|
||||
gather_shader_info_ngg_query(device, info);
|
||||
gather_shader_info_ngg_query(compiler_info, info);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
||||
gather_shader_info_tcs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
ac_nir_map_io_driver_location map_output = info->outputs_linked ? NULL : radv_map_io_driver_location;
|
||||
|
||||
nir_tcs_info tcs_info;
|
||||
|
|
@ -597,7 +593,7 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
|||
info->tcs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
|
||||
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
radv_get_tess_wg_info(pdev, &info->tcs.io_info, nir->info.tess.tcs_vertices_out,
|
||||
radv_get_tess_wg_info(compiler_info, &info->tcs.io_info, nir->info.tess.tcs_vertices_out,
|
||||
gfx_state->ts.patch_control_points,
|
||||
/* TODO: This should be only inputs in LDS (not VGPR inputs) to reduce LDS usage */
|
||||
info->tcs.num_linked_inputs, &info->num_tess_patches, &info->tcs.lds_size);
|
||||
|
|
@ -605,10 +601,9 @@ gather_shader_info_tcs(struct radv_device *device, const nir_shader *nir,
|
|||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
|
||||
gather_shader_info_tes(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
info->gs_inputs_read = ~0ULL;
|
||||
info->tes._primitive_mode = nir->info.tess._primitive_mode;
|
||||
info->tes.spacing = nir->info.tess.spacing;
|
||||
|
|
@ -629,11 +624,11 @@ gather_shader_info_tes(struct radv_device *device, const nir_shader *nir, struct
|
|||
|
||||
if (info->next_stage == MESA_SHADER_GEOMETRY) {
|
||||
info->tes.as_es = true;
|
||||
info->esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->tes.num_linked_outputs);
|
||||
info->esgs_itemsize = radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->tes.num_linked_outputs);
|
||||
}
|
||||
|
||||
if (info->is_ngg && (info->next_stage == MESA_SHADER_FRAGMENT || info->next_stage == MESA_SHADER_NONE)) {
|
||||
gather_shader_info_ngg_query(device, info);
|
||||
gather_shader_info_ngg_query(compiler_info, info);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -678,10 +673,9 @@ radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shade
|
|||
}
|
||||
|
||||
void
|
||||
radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info,
|
||||
radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_legacy_gs_info *out = &gs_info->legacy_gs_info;
|
||||
const unsigned esgs_vertex_stride = es_info ? es_info->esgs_itemsize : out->esgs_itemsize;
|
||||
ac_legacy_gs_subgroup_info info;
|
||||
|
|
@ -689,7 +683,7 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
|
|||
ac_legacy_gs_compute_subgroup_info(gs_info->gs.input_prim, gs_info->gs.vertices_out, gs_info->gs.invocations,
|
||||
esgs_vertex_stride, &info);
|
||||
|
||||
const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(pdev->info.gfx_level);
|
||||
const uint32_t lds_granularity = ac_shader_get_lds_alloc_granularity(compiler_info->ac->gfx_level);
|
||||
const uint32_t total_lds_bytes = align(info.esgs_lds_size * 4, lds_granularity);
|
||||
|
||||
out->gs_inst_prims_in_subgroup = info.gs_inst_prims_in_subgroup;
|
||||
|
|
@ -699,10 +693,9 @@ radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_inf
|
|||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
|
||||
gather_shader_info_gs(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
|
||||
info->gs.vertices_in = nir->info.gs.vertices_in;
|
||||
info->gs.vertices_out = nir->info.gs.vertices_out;
|
||||
info->gs.input_prim = nir->info.gs.input_primitive;
|
||||
|
|
@ -713,13 +706,14 @@ gather_shader_info_gs(struct radv_device *device, const nir_shader *nir, struct
|
|||
info->gs.num_linked_inputs = util_last_bit64(radv_gather_unlinked_io_mask(nir->info.inputs_read));
|
||||
|
||||
if (info->is_ngg)
|
||||
gather_shader_info_ngg_query(device, info);
|
||||
gather_shader_info_ngg_query(compiler_info, info);
|
||||
else
|
||||
info->legacy_gs_info.esgs_itemsize = radv_compute_esgs_itemsize(pdev->info.gfx_level, info->gs.num_linked_inputs);
|
||||
info->legacy_gs_info.esgs_itemsize =
|
||||
radv_compute_esgs_itemsize(compiler_info->ac->gfx_level, info->gs.num_linked_inputs);
|
||||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
|
||||
gather_shader_info_mesh(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info)
|
||||
{
|
||||
struct gfx10_ngg_info *ngg_info = &info->ngg_info;
|
||||
|
|
@ -760,17 +754,17 @@ gather_shader_info_mesh(struct radv_device *device, const nir_shader *nir,
|
|||
ngg_info->prim_amp_factor = nir->info.mesh.max_primitives_out;
|
||||
ngg_info->vgt_esgs_ring_itemsize = 1;
|
||||
|
||||
info->ms.has_query = device->cache_key.mesh_shader_queries;
|
||||
info->ms.has_query = compiler_info->mesh_shader_queries;
|
||||
info->ms.has_task = stage_key->has_task_shader;
|
||||
}
|
||||
|
||||
static void
|
||||
calc_mesh_workgroup_size(const struct radv_device *device, const nir_shader *nir, struct radv_shader_info *info)
|
||||
calc_mesh_workgroup_size(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
unsigned api_workgroup_size = ac_compute_cs_workgroup_size(nir->info.workgroup_size, false, UINT32_MAX);
|
||||
|
||||
if (pdev->info.mesh_fast_launch_2) {
|
||||
if (compiler_info->hw.mesh_fast_launch_2) {
|
||||
/* Use multi-row export. It is also necessary to use the API workgroup size for non-emulated queries. */
|
||||
info->workgroup_size = api_workgroup_size;
|
||||
} else {
|
||||
|
|
@ -884,7 +878,7 @@ gather_shader_info_rt(const nir_shader *nir, struct radv_shader_info *info)
|
|||
}
|
||||
|
||||
static void
|
||||
gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
|
||||
gather_shader_info_task(const struct radv_compiler_info *compiler_info, const nir_shader *nir,
|
||||
const struct radv_shader_stage_key *stage_key, struct radv_shader_info *info)
|
||||
{
|
||||
/* Task shaders always need these for the I/O lowering even if the API shader doesn't actually
|
||||
|
|
@ -906,7 +900,7 @@ gather_shader_info_task(struct radv_device *device, const nir_shader *nir,
|
|||
info->cs.linear_taskmesh_dispatch =
|
||||
nir->info.mesh.ts_mesh_dispatch_dimensions[1] == 1 && nir->info.mesh.ts_mesh_dispatch_dimensions[2] == 1;
|
||||
|
||||
info->cs.has_query = device->cache_key.mesh_shader_queries;
|
||||
info->cs.has_query = compiler_info->mesh_shader_queries;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
|
|
@ -1000,12 +994,11 @@ radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage,
|
|||
}
|
||||
|
||||
void
|
||||
radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
||||
radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir,
|
||||
const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key,
|
||||
const struct radv_graphics_state_key *gfx_state, const enum radv_pipeline_type pipeline_type,
|
||||
bool consider_force_vrs, struct radv_shader_info *info)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct nir_function *func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
|
||||
|
||||
if (layout->use_dynamic_descriptors) {
|
||||
|
|
@ -1060,11 +1053,11 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
|
||||
/* Used by compute and mesh shaders. Mesh shaders must always declare this before GFX11. */
|
||||
info->cs.uses_grid_size = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_WORKGROUPS) ||
|
||||
(nir->info.stage == MESA_SHADER_MESH && pdev->info.gfx_level < GFX11);
|
||||
(nir->info.stage == MESA_SHADER_MESH && compiler_info->ac->gfx_level < GFX11);
|
||||
info->cs.uses_local_invocation_idx = BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_SUBGROUP_ID) |
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_NUM_SUBGROUPS) |
|
||||
radv_shader_should_clear_lds(device, nir);
|
||||
radv_shader_should_clear_lds(compiler_info, nir);
|
||||
info->cs.derivative_group = nir->info.derivative_group;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE || nir->info.stage == MESA_SHADER_TASK ||
|
||||
|
|
@ -1073,8 +1066,9 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
info->cs.block_size[i] = nir->info.workgroup_size[i];
|
||||
}
|
||||
|
||||
info->user_data_0 = radv_get_user_data_0(pdev->info.gfx_level, info);
|
||||
info->merged_shader_compiled_separately = radv_is_merged_shader_compiled_separately(pdev->info.gfx_level, info);
|
||||
info->user_data_0 = radv_get_user_data_0(compiler_info->ac->gfx_level, info);
|
||||
info->merged_shader_compiled_separately =
|
||||
radv_is_merged_shader_compiled_separately(compiler_info->ac->gfx_level, info);
|
||||
info->force_indirect_descriptors = info->merged_shader_compiled_separately || stage_key->indirect_bindable;
|
||||
info->descriptor_heap = stage_key->descriptor_heap;
|
||||
|
||||
|
|
@ -1082,25 +1076,25 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
case MESA_SHADER_COMPUTE:
|
||||
break;
|
||||
case MESA_SHADER_TASK:
|
||||
gather_shader_info_task(device, nir, stage_key, info);
|
||||
gather_shader_info_task(compiler_info, nir, stage_key, info);
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
gather_shader_info_fs(pdev->info.gfx_level, nir, gfx_state, info);
|
||||
gather_shader_info_fs(compiler_info->ac->gfx_level, nir, gfx_state, info);
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
gather_shader_info_gs(device, nir, info);
|
||||
gather_shader_info_gs(compiler_info, nir, info);
|
||||
break;
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
gather_shader_info_tes(device, nir, info);
|
||||
gather_shader_info_tes(compiler_info, nir, info);
|
||||
break;
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
gather_shader_info_tcs(device, nir, gfx_state, info);
|
||||
gather_shader_info_tcs(compiler_info, nir, gfx_state, info);
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
gather_shader_info_vs(device, nir, gfx_state, stage_key, info);
|
||||
gather_shader_info_vs(compiler_info, nir, gfx_state, stage_key, info);
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
gather_shader_info_mesh(device, nir, stage_key, info);
|
||||
gather_shader_info_mesh(compiler_info, nir, stage_key, info);
|
||||
break;
|
||||
default:
|
||||
if (mesa_shader_stage_is_rt(nir->info.stage))
|
||||
|
|
@ -1111,7 +1105,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
info->wave_size = nir->info.min_subgroup_size;
|
||||
assert(info->wave_size == nir->info.max_subgroup_size);
|
||||
assert(info->wave_size == 32 || info->wave_size == 64);
|
||||
assert(pdev->info.gfx_level >= GFX10 || info->wave_size == 64);
|
||||
assert(compiler_info->ac->gfx_level >= GFX10 || info->wave_size == 64);
|
||||
assert(nir->info.stage != MESA_SHADER_GEOMETRY || info->is_ngg || info->wave_size == 64);
|
||||
|
||||
switch (nir->info.stage) {
|
||||
|
|
@ -1140,7 +1134,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
case MESA_SHADER_TESS_CTRL:
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
info->workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
|
||||
ac_compute_lshs_workgroup_size(compiler_info->ac->gfx_level, MESA_SHADER_TESS_CTRL, info->num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, info->tcs.tcs_vertices_out);
|
||||
} else {
|
||||
/* Set the maximum possible value when the workgroup size can't be determined. */
|
||||
|
|
@ -1168,7 +1162,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
* Set the maximum possible value by default, this will be optimized during linking if
|
||||
* possible.
|
||||
*/
|
||||
if (pdev->info.gfx_level <= GFX8)
|
||||
if (compiler_info->ac->gfx_level <= GFX8)
|
||||
info->workgroup_size = info->wave_size;
|
||||
else
|
||||
info->workgroup_size = 256;
|
||||
|
|
@ -1181,7 +1175,7 @@ radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *n
|
|||
}
|
||||
break;
|
||||
case MESA_SHADER_MESH:
|
||||
calc_mesh_workgroup_size(device, nir, info);
|
||||
calc_mesh_workgroup_size(compiler_info, nir, info);
|
||||
break;
|
||||
default:
|
||||
/* FS always operates without workgroups. Other stages are computed during linking but assume
|
||||
|
|
@ -1255,11 +1249,10 @@ gfx10_get_ngg_vert_prim_lds_size(const struct radv_shader_info *es_info, const s
|
|||
}
|
||||
|
||||
void
|
||||
gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
|
||||
struct gfx10_ngg_info *out)
|
||||
gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const enum amd_gfx_level gfx_level = pdev->info.gfx_level;
|
||||
const enum amd_gfx_level gfx_level = compiler_info->ac->gfx_level;
|
||||
const struct radv_shader_info *stage_info = gs_info ? gs_info : es_info;
|
||||
const unsigned gs_num_invocations = gs_info ? MAX2(gs_info->gs.invocations, 1) : 1;
|
||||
const unsigned input_prim = radv_get_pre_rast_input_topology(es_info, gs_info);
|
||||
|
|
@ -1301,10 +1294,9 @@ gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_s
|
|||
}
|
||||
|
||||
static void
|
||||
radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage *ngg_stage,
|
||||
radv_determine_ngg_settings(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *ngg_stage,
|
||||
struct radv_shader_stage *fs_stage, const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
uint64_t ps_inputs_read;
|
||||
|
||||
assert(ngg_stage->info.is_ngg);
|
||||
|
|
@ -1337,11 +1329,12 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
|
|||
}
|
||||
|
||||
ngg_stage->info.has_ngg_culling =
|
||||
radv_consider_culling(pdev, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info);
|
||||
radv_consider_culling(compiler_info, ngg_stage->nir, ps_inputs_read, num_vertices_per_prim, &ngg_stage->info);
|
||||
|
||||
if (ngg_stage->stage != MESA_SHADER_GEOMETRY) {
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(ngg_stage->nir);
|
||||
ngg_stage->info.has_ngg_early_prim_export = pdev->info.gfx_level < GFX11 && exec_list_is_singular(&impl->body);
|
||||
ngg_stage->info.has_ngg_early_prim_export =
|
||||
compiler_info->ac->gfx_level < GFX11 && exec_list_is_singular(&impl->body);
|
||||
|
||||
/* NGG passthrough mode should be disabled when culling and when the vertex shader
|
||||
* exports the primitive ID.
|
||||
|
|
@ -1353,10 +1346,9 @@ radv_determine_ngg_settings(struct radv_device *device, struct radv_shader_stage
|
|||
}
|
||||
|
||||
static void
|
||||
radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *stages,
|
||||
radv_link_shaders_info(const struct radv_compiler_info *compiler_info, struct radv_shader_stage *stages,
|
||||
const struct radv_graphics_state_key *gfx_state)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_shader_stage *vs_stage = stages[MESA_SHADER_VERTEX].nir ? &stages[MESA_SHADER_VERTEX] : NULL;
|
||||
struct radv_shader_stage *tcs_stage = stages[MESA_SHADER_TESS_CTRL].nir ? &stages[MESA_SHADER_TESS_CTRL] : NULL;
|
||||
struct radv_shader_stage *tes_stage = stages[MESA_SHADER_TESS_EVAL].nir ? &stages[MESA_SHADER_TESS_EVAL] : NULL;
|
||||
|
|
@ -1382,15 +1374,15 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
|
|||
const bool ps_prim_id_in = !fs_stage || fs_stage->info.ps.prim_id_input;
|
||||
const bool ps_clip_dists_in = !fs_stage || !!fs_stage->info.ps.input_clips_culls_mask;
|
||||
|
||||
radv_set_vs_output_param(pdev->info.gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info, ps_prim_id_in,
|
||||
ps_clip_dists_in);
|
||||
radv_set_vs_output_param(compiler_info->ac->gfx_level, prerast_stage->nir, gfx_state, &prerast_stage->info,
|
||||
ps_prim_id_in, ps_clip_dists_in);
|
||||
}
|
||||
|
||||
if (prerast_stage && !ms_stage) {
|
||||
/* Compute NGG info (GFX10+) or GS info. */
|
||||
if (ngg_stage) {
|
||||
/* Determine other NGG settings like culling. */
|
||||
radv_determine_ngg_settings(device, ngg_stage, fs_stage, gfx_state);
|
||||
radv_determine_ngg_settings(compiler_info, ngg_stage, fs_stage, gfx_state);
|
||||
|
||||
if (es_stage) {
|
||||
gfx10_ngg_set_esgs_ring_itemsize(&es_stage->info, gs_stage ? &gs_stage->info : NULL,
|
||||
|
|
@ -1412,11 +1404,11 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
|
|||
vs_stage->info.vs.tcs_inputs_via_lds = tcs_stage->nir->info.inputs_read;
|
||||
|
||||
if (gfx_state->ts.patch_control_points) {
|
||||
vs_stage->info.workgroup_size =
|
||||
ac_compute_lshs_workgroup_size(pdev->info.gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
|
||||
vs_stage->info.workgroup_size = ac_compute_lshs_workgroup_size(
|
||||
compiler_info->ac->gfx_level, MESA_SHADER_VERTEX, tcs_stage->info.num_tess_patches,
|
||||
gfx_state->ts.patch_control_points, tcs_stage->info.tcs.tcs_vertices_out);
|
||||
|
||||
if (!pdev->use_llvm) {
|
||||
if (!compiler_info->debug.use_llvm) {
|
||||
/* When the number of TCS input and output vertices are the same (typically 3):
|
||||
* - There is an equal amount of LS and HS invocations
|
||||
* - In case of merged LSHS shaders, the LS and HS halves of the shader always process
|
||||
|
|
@ -1427,7 +1419,7 @@ radv_link_shaders_info(struct radv_device *device, struct radv_shader_stage *sta
|
|||
* instruction dominating another with a different mode.
|
||||
*/
|
||||
vs_stage->info.vs.tcs_in_out_eq =
|
||||
pdev->info.gfx_level >= GFX9 &&
|
||||
compiler_info->ac->gfx_level >= GFX9 &&
|
||||
gfx_state->ts.patch_control_points == tcs_stage->info.tcs.tcs_vertices_out &&
|
||||
vs_stage->nir->info.float_controls_execution_mode == tcs_stage->nir->info.float_controls_execution_mode;
|
||||
|
||||
|
|
@ -1493,14 +1485,12 @@ radv_nir_shader_info_merge(const struct radv_shader_stage *src, struct radv_shad
|
|||
}
|
||||
|
||||
void
|
||||
radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stages)
|
||||
radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
radv_link_shaders_info(compiler_info, stages, gfx_state);
|
||||
|
||||
radv_link_shaders_info(device, stages, gfx_state);
|
||||
|
||||
if (pdev->info.gfx_level >= GFX9) {
|
||||
if (compiler_info->ac->gfx_level >= GFX9) {
|
||||
/* Merge shader info for VS+TCS. */
|
||||
if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_TESS_CTRL].nir) {
|
||||
radv_nir_shader_info_merge(&stages[MESA_SHADER_VERTEX], &stages[MESA_SHADER_TESS_CTRL]);
|
||||
|
|
|
|||
|
|
@ -327,7 +327,7 @@ struct radv_shader_regs {
|
|||
|
||||
void radv_nir_shader_info_init(mesa_shader_stage stage, mesa_shader_stage next_stage, struct radv_shader_info *info);
|
||||
|
||||
void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shader *nir,
|
||||
void radv_nir_shader_info_pass(const struct radv_compiler_info *compiler_info, const struct nir_shader *nir,
|
||||
const struct radv_shader_layout *layout, const struct radv_shader_stage_key *stage_key,
|
||||
const struct radv_graphics_state_key *gfx_state,
|
||||
const enum radv_pipeline_type pipeline_type, bool consider_force_vrs,
|
||||
|
|
@ -335,16 +335,17 @@ void radv_nir_shader_info_pass(struct radv_device *device, const struct nir_shad
|
|||
|
||||
void radv_get_esgs_gsvs_ring_size(const struct radv_device *device, struct radv_shader_regs *regs,
|
||||
const struct radv_shader_info *es_info, const struct radv_shader_info *gs_info);
|
||||
void radv_get_legacy_gs_info(const struct radv_device *device, struct radv_shader_info *es_info, struct radv_shader_info *gs_info);
|
||||
void radv_get_legacy_gs_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info);
|
||||
|
||||
void gfx10_get_ngg_info(const struct radv_device *device, struct radv_shader_info *es_info,
|
||||
void gfx10_get_ngg_info(const struct radv_compiler_info *compiler_info, struct radv_shader_info *es_info,
|
||||
struct radv_shader_info *gs_info, struct gfx10_ngg_info *out);
|
||||
|
||||
void gfx10_ngg_set_esgs_ring_itemsize(struct radv_shader_info *es_info, struct radv_shader_info *gs_info,
|
||||
struct gfx10_ngg_info *out);
|
||||
|
||||
void radv_nir_shader_info_link(struct radv_device *device, const struct radv_graphics_state_key *gfx_state,
|
||||
struct radv_shader_stage *stages);
|
||||
void radv_nir_shader_info_link(const struct radv_compiler_info *compiler_info,
|
||||
const struct radv_graphics_state_key *gfx_state, struct radv_shader_stage *stages);
|
||||
|
||||
enum ac_hw_stage radv_select_hw_stage(const struct radv_shader_info *const info, const enum amd_gfx_level gfx_level);
|
||||
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
|
|||
struct radv_shader_debug_info debug[MESA_VULKAN_SHADER_STAGES] = {0};
|
||||
struct radv_shader_debug_info gs_copy_debug = {0};
|
||||
|
||||
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
|
||||
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
|
||||
&gs_copy_debug, &shader_obj->gs.copy_binary);
|
||||
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader,
|
||||
shader_obj->gs.copy_binary, &gs_copy_debug);
|
||||
|
|
@ -202,7 +202,7 @@ radv_shader_object_init_graphics(struct radv_shader_object *shader_obj, struct r
|
|||
radv_shader_stage_init(pCreateInfo, &stages[stage]);
|
||||
stages[stage].next_stage = next_stage;
|
||||
|
||||
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug,
|
||||
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug,
|
||||
binaries, &gs_copy_debug, &shader_obj->gs.copy_binary);
|
||||
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &shader_obj->gs.copy_shader,
|
||||
shader_obj->gs.copy_binary, &gs_copy_debug);
|
||||
|
|
@ -251,7 +251,7 @@ radv_shader_object_init_compute(struct radv_shader_object *shader_obj, struct ra
|
|||
radv_shader_stage_init(pCreateInfo, &stage);
|
||||
|
||||
struct radv_shader_debug_info cs_dbg = {0};
|
||||
struct radv_shader_binary *cs_binary = radv_compile_cs(device, &stage, false, false, false, &cs_dbg);
|
||||
struct radv_shader_binary *cs_binary = radv_compile_cs(&device->compiler_info, &stage, false, false, false, &cs_dbg);
|
||||
struct radv_shader *cs_shader = radv_shader_create(device, NULL, cs_binary, true, &cs_dbg);
|
||||
|
||||
ralloc_free(stage.nir);
|
||||
|
|
@ -474,7 +474,7 @@ radv_shader_object_create_linked(VkDevice _device, uint32_t createInfoCount, con
|
|||
struct radv_shader_binary *gs_copy_binary = NULL;
|
||||
struct radv_shader_debug_info gs_copy_debug = {0};
|
||||
|
||||
radv_graphics_shaders_compile(device, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
|
||||
radv_graphics_shaders_compile(&device->compiler_info, NULL, stages, &gfx_state, false, false, false, NULL, false, debug, binaries,
|
||||
&gs_copy_debug, &gs_copy_binary);
|
||||
radv_graphics_shaders_create(device, NULL, true, shaders, binaries, debug, &gs_copy_shader, gs_copy_binary,
|
||||
&gs_copy_debug);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue