radeonsi: add si_nir_lower_resource pass

Replace the load_ubo abi.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18666>
This commit is contained in:
Qiang Yu 2022-08-16 18:29:03 +08:00
parent f0f5d5c5e5
commit 5bc5cae571
5 changed files with 150 additions and 55 deletions

View file

@ -46,6 +46,7 @@ files_libradeonsi = files(
'si_query.c',
'si_query.h',
'si_nir_lower_abi.c',
'si_nir_lower_resource.c',
'si_nir_optim.c',
'si_sdma_copy_image.c',
'si_shader.c',

View file

@ -0,0 +1,143 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* on the rights to use, copy, modify, merge, publish, distribute, sub
* license, and/or sell copies of the Software, and to permit persons to whom
* the Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/*
* This lowering pass converts index based buffer/image/texture access to
* explicite descriptor based, which simplify the compiler backend translation.
*
* For example: load_ubo(1) -> load_ubo(vec4), where the vec4 is the buffer
* descriptor with index==1, so compiler backend don't need to do index-to-descriptor
* finding which is the most complicated part (move to nir now).
*/
#include "nir_builder.h"
#include "ac_nir.h"
#include "si_pipe.h"
#include "si_shader_internal.h"
#include "sid.h"
struct lower_resource_state {
struct si_shader *shader;
struct si_shader_args *args;
};
static nir_ssa_def *load_ubo_desc_fast_path(nir_builder *b, nir_ssa_def *addr_lo,
struct si_shader_selector *sel)
{
nir_ssa_def *addr_hi =
nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(sel->screen->info.address32_hi));
uint32_t rsrc3 =
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (sel->screen->info.gfx_level >= GFX11)
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
else if (sel->screen->info.gfx_level >= GFX10)
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
else
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
return nir_vec4(b, addr_lo, addr_hi, nir_imm_int(b, sel->info.constbuf0_num_slots * 16),
nir_imm_int(b, rsrc3));
}
static nir_ssa_def *clamp_index(nir_builder *b, nir_ssa_def *index, unsigned max)
{
if (util_is_power_of_two_or_zero(max))
return nir_iand_imm(b, index, max - 1);
else {
nir_ssa_def *clamp = nir_imm_int(b, max - 1);
nir_ssa_def *cond = nir_uge(b, clamp, index);
return nir_bcsel(b, cond, index, clamp);
}
}
static nir_ssa_def *load_ubo_desc(nir_builder *b, nir_ssa_def *index,
struct lower_resource_state *s)
{
struct si_shader_selector *sel = s->shader->selector;
nir_ssa_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers);
if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0)
return load_ubo_desc_fast_path(b, addr, sel);
index = clamp_index(b, index, sel->info.base.num_ubos);
index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
nir_ssa_def *offset = nir_ishl_imm(b, index, 4);
return nir_load_smem_amd(b, 4, addr, offset);
}
static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
struct lower_resource_state *s)
{
switch (intrin->intrinsic) {
case nir_intrinsic_load_ubo: {
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
nir_ssa_def *desc = load_ubo_desc(b, intrin->src[0].ssa, s);
nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], desc);
break;
}
default:
return false;
}
return true;
}
static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
{
struct lower_resource_state *s = (struct lower_resource_state *)state;
b->cursor = nir_before_instr(instr);
switch (instr->type) {
case nir_instr_type_intrinsic: {
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
return lower_resource_intrinsic(b, intrin, s);
}
default:
return false;
}
}
bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
struct si_shader_args *args)
{
struct lower_resource_state state = {
.shader = shader,
.args = args,
};
return nir_shader_instructions_pass(nir, lower_resource_instr,
nir_metadata_dominance | nir_metadata_block_index,
&state);
}

View file

@ -1886,6 +1886,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_
if (sel->stage == MESA_SHADER_FRAGMENT && key->ps.mono.point_smoothing)
NIR_PASS(progress, nir, nir_lower_point_smooth);
NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
bool is_last_vgt_stage =
(sel->stage == MESA_SHADER_VERTEX ||
sel->stage == MESA_SHADER_TESS_EVAL ||

View file

@ -189,6 +189,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
/* si_nir_lower_abi.c */
bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args);
/* si_nir_lower_resource.c */
bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
struct si_shader_args *args);
/* si_shader_llvm.c */
bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,

View file

@ -53,60 +53,6 @@ static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValue
return index;
}
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
{
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->const_and_shader_buffers);
struct si_shader_selector *sel = ctx->shader->selector;
/* Do the bounds checking with a descriptor, because
* doing computation and manual bounds checking of 64-bit
* addresses generates horrible VALU code with very high
* VGPR usage and very low SIMD occupancy.
*/
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
LLVMValueRef desc0, desc1;
desc0 = ptr;
desc1 = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
if (ctx->screen->info.gfx_level >= GFX11)
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
else if (ctx->screen->info.gfx_level >= GFX10)
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
else
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
LLVMValueRef desc_elems[] = {desc0, desc1,
LLVMConstInt(ctx->ac.i32, sel->info.constbuf0_num_slots * 16, 0),
LLVMConstInt(ctx->ac.i32, rsrc3, false)};
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
}
static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
struct si_shader_selector *sel = ctx->shader->selector;
if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0) {
return load_const_buffer_desc_fast_path(ctx);
}
index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
index =
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS, 0), "");
return ac_build_load_to_sgpr(&ctx->ac,
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->const_and_shader_buffers),
index);
}
static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write, bool non_uniform)
{
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
@ -335,7 +281,6 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
{
ctx->abi.load_ubo = load_ubo;
ctx->abi.load_ssbo = load_ssbo;
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
}