mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-06 00:49:04 +02:00
ac: lower load_num_workgroups in NIR
Reviewed-by: Timur Kristóf <timur.kristof@gmail.com> Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/39638>
This commit is contained in:
parent
1e11e83d1c
commit
d1e6a5c1c8
17 changed files with 12 additions and 45 deletions
|
|
@ -96,6 +96,7 @@ typedef struct {
|
|||
unsigned wave_size;
|
||||
unsigned workgroup_size;
|
||||
bool use_llvm;
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
} ac_nir_lower_intrinsics_to_args_options;
|
||||
|
||||
bool ac_nir_lower_intrinsics_to_args(nir_shader *shader, const struct ac_shader_args *ac_args,
|
||||
|
|
|
|||
|
|
@ -184,6 +184,14 @@ lower_intrinsic_to_arg(nir_builder *b, nir_intrinsic_instr *intrin, void *state)
|
|||
UNREACHABLE("unexpected shader stage");
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
if (s->options->load_grid_size_from_user_sgpr) {
|
||||
replacement = ac_nir_load_arg(b, s->args, s->args->num_work_groups);
|
||||
} else {
|
||||
nir_def *addr = nir_pack_64_2x32(b, ac_nir_load_arg(b, s->args, s->args->num_work_groups));
|
||||
replacement = ac_nir_load_smem(b, 3, addr, nir_imm_int(b, 0), 4, ACCESS_CAN_SPECULATE);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_pixel_coord:
|
||||
replacement = nir_unpack_32_2x16(b, ac_nir_load_arg(b, s->args, s->args->pos_fixed_pt));
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -148,7 +148,6 @@ struct aco_compiler_options {
|
|||
bool record_ir;
|
||||
bool record_stats;
|
||||
bool has_ls_vgpr_init_bug;
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
bool optimisations_disabled;
|
||||
uint8_t enable_mrt_output_nan_fixup;
|
||||
bool wgp_mode;
|
||||
|
|
|
|||
|
|
@ -553,7 +553,6 @@ init_context(isel_context* ctx, nir_shader* shader)
|
|||
RegType type = RegType::sgpr;
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_load_push_constant:
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
case nir_intrinsic_load_num_subgroups:
|
||||
case nir_intrinsic_vote_all:
|
||||
case nir_intrinsic_vote_any:
|
||||
|
|
|
|||
|
|
@ -4012,20 +4012,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
case nir_intrinsic_load_scratch: visit_load_scratch(ctx, instr); break;
|
||||
case nir_intrinsic_store_scratch: visit_store_scratch(ctx, instr); break;
|
||||
case nir_intrinsic_barrier: emit_barrier(ctx, instr); break;
|
||||
case nir_intrinsic_load_num_workgroups: {
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
if (ctx->options->load_grid_size_from_user_sgpr) {
|
||||
bld.copy(Definition(dst), get_arg(ctx, ctx->args->num_work_groups));
|
||||
} else {
|
||||
Temp addr = get_arg(ctx, ctx->args->num_work_groups);
|
||||
assert(addr.regClass() == s2);
|
||||
bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
|
||||
bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), addr, Operand::zero()),
|
||||
bld.smem(aco_opcode::s_load_dword, bld.def(s1), addr, Operand::c32(8)));
|
||||
}
|
||||
emit_split_vector(ctx, dst, 3);
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_ddx:
|
||||
case nir_intrinsic_ddy:
|
||||
case nir_intrinsic_ddx_fine:
|
||||
|
|
|
|||
|
|
@ -838,12 +838,6 @@ static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMTypeRe
|
|||
return result;
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, struct ac_llvm_pointer ptr,
|
||||
LLVMValueRef index)
|
||||
{
|
||||
return ac_build_load_custom(ctx, ptr.t, ptr.v, index, false, true, false);
|
||||
}
|
||||
|
||||
/* This assumes that there is no unsigned integer wraparound during the address
|
||||
* computation, excluding all GEPs within base_ptr. */
|
||||
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_pointer ptr,
|
||||
|
|
|
|||
|
|
@ -222,8 +222,6 @@ LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, struct ac_llvm_pointer p
|
|||
void ac_build_indexed_store(struct ac_llvm_context *ctx, struct ac_llvm_pointer ptr, LLVMValueRef index,
|
||||
LLVMValueRef value);
|
||||
|
||||
LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, struct ac_llvm_pointer ptr,
|
||||
LLVMValueRef index);
|
||||
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, struct ac_llvm_pointer ptr,
|
||||
LLVMValueRef index);
|
||||
|
||||
|
|
|
|||
|
|
@ -2669,17 +2669,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
|
|||
case nir_intrinsic_is_helper_invocation:
|
||||
result = ac_build_load_helper_invocation(&ctx->ac);
|
||||
break;
|
||||
case nir_intrinsic_load_num_workgroups:
|
||||
if (ctx->abi->load_grid_size_from_user_sgpr) {
|
||||
result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
|
||||
} else {
|
||||
struct ac_llvm_pointer ptr;
|
||||
ptr.pointee_type = ctx->ac.v3i32;
|
||||
ptr.value = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
|
||||
|
||||
result = ac_build_load_invariant(&ctx->ac, ptr, ctx->ac.i32_0);
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_id:
|
||||
assert(mesa_shader_stage_is_compute(ctx->stage) && ctx->ac.gfx_level >= GFX12);
|
||||
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wave.id", ctx->ac.i32, NULL, 0, 0);
|
||||
|
|
|
|||
|
|
@ -63,9 +63,6 @@ struct ac_shader_abi {
|
|||
/* Clamp div by 0 (so it won't produce NaN) */
|
||||
bool clamp_div_by_zero;
|
||||
|
||||
/* Whether to inline the compute dispatch size in user sgprs. */
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
|
||||
/* Whether to disable anisotropic filtering. */
|
||||
bool disable_aniso_single_level;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -119,7 +119,6 @@ radv_aco_convert_opts(struct aco_compiler_options *aco_info, const struct radv_n
|
|||
ASSIGN_FIELD(debug.private_data);
|
||||
aco_info->cu_info = &radv->info->cu_info;
|
||||
aco_info->is_opengl = false;
|
||||
aco_info->load_grid_size_from_user_sgpr = radv_args->load_grid_size_from_user_sgpr;
|
||||
aco_info->optimisations_disabled = stage_key->optimisations_disabled;
|
||||
aco_info->gfx_level = radv->info->gfx_level;
|
||||
aco_info->family = radv->info->family;
|
||||
|
|
|
|||
|
|
@ -240,7 +240,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, const struct radv_nir
|
|||
ctx.abi.load_sampler_desc = radv_get_sampler_desc;
|
||||
ctx.abi.clamp_shadow_reference = false;
|
||||
ctx.abi.robust_buffer_access = options->robust_buffer_access_llvm;
|
||||
ctx.abi.load_grid_size_from_user_sgpr = args->load_grid_size_from_user_sgpr;
|
||||
|
||||
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
|
||||
if (shader_count >= 2 || is_ngg)
|
||||
|
|
|
|||
|
|
@ -497,6 +497,7 @@ radv_postprocess_nir(struct radv_device *device, const struct radv_graphics_stat
|
|||
.wave_size = stage->info.wave_size,
|
||||
.workgroup_size = stage->info.workgroup_size,
|
||||
.use_llvm = radv_use_llvm_for_stage(pdev, stage->stage),
|
||||
.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr,
|
||||
});
|
||||
NIR_PASS(_, stage->nir, radv_nir_lower_abi, gfx_level, stage, gfx_state, pdev->info.address32_hi);
|
||||
|
||||
|
|
|
|||
|
|
@ -312,7 +312,6 @@ radv_init_shader_args(const struct radv_device *device, mesa_shader_stage stage,
|
|||
|
||||
args->explicit_scratch_args = !radv_use_llvm_for_stage(pdev, stage);
|
||||
args->remap_spi_ps_input = !radv_use_llvm_for_stage(pdev, stage);
|
||||
args->load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr;
|
||||
|
||||
for (int i = 0; i < MAX_SETS; i++)
|
||||
args->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
|
||||
|
|
@ -543,7 +542,7 @@ declare_shader_args(const struct radv_device *device, const struct radv_graphics
|
|||
declare_global_input_sgprs(gfx_level, info, user_sgpr_info, args);
|
||||
|
||||
if (info->cs.uses_grid_size) {
|
||||
if (args->load_grid_size_from_user_sgpr)
|
||||
if (device->load_grid_size_from_user_sgpr)
|
||||
add_ud_arg(args, 3, AC_ARG_VALUE, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
|
||||
else
|
||||
add_ud_arg(args, 2, AC_ARG_CONST_ADDR, &args->ac.num_work_groups, AC_UD_CS_GRID_SIZE);
|
||||
|
|
|
|||
|
|
@ -121,7 +121,6 @@ struct radv_shader_args {
|
|||
|
||||
bool explicit_scratch_args;
|
||||
bool remap_spi_ps_input;
|
||||
bool load_grid_size_from_user_sgpr;
|
||||
};
|
||||
|
||||
static inline struct radv_shader_args *
|
||||
|
|
|
|||
|
|
@ -1144,6 +1144,7 @@ static void si_postprocess_nir(struct si_nir_shader_ctx *ctx)
|
|||
.wave_size = shader->wave_size,
|
||||
.workgroup_size = si_get_max_workgroup_size(shader),
|
||||
.use_llvm = !nir->info.use_aco_amd,
|
||||
.load_grid_size_from_user_sgpr = true,
|
||||
});
|
||||
|
||||
/* LLVM keep non-uniform sampler as index, so can't do this in NIR.
|
||||
|
|
|
|||
|
|
@ -53,7 +53,6 @@ si_fill_aco_options(struct si_screen *screen, mesa_shader_stage stage,
|
|||
options->is_opengl = true;
|
||||
|
||||
options->has_ls_vgpr_init_bug = screen->info.has_ls_vgpr_init_bug;
|
||||
options->load_grid_size_from_user_sgpr = true;
|
||||
options->family = screen->info.family;
|
||||
options->gfx_level = screen->info.gfx_level;
|
||||
options->address32_hi = screen->info.address32_hi;
|
||||
|
|
|
|||
|
|
@ -544,7 +544,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
|
|||
|
||||
ctx->abi.clamp_shadow_reference = true;
|
||||
ctx->abi.robust_buffer_access = true;
|
||||
ctx->abi.load_grid_size_from_user_sgpr = true;
|
||||
ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero ||
|
||||
info->options & SI_PROFILE_CLAMP_DIV_BY_ZERO;
|
||||
ctx->abi.disable_aniso_single_level = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue