mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-04 20:38:06 +02:00
radeonsi: add si_nir_lower_resource pass
Replace the load_ubo abi. Reviewed-by: Marek Olšák <marek.olsak@amd.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/18666>
This commit is contained in:
parent
f0f5d5c5e5
commit
5bc5cae571
5 changed files with 150 additions and 55 deletions
|
|
@ -46,6 +46,7 @@ files_libradeonsi = files(
|
|||
'si_query.c',
|
||||
'si_query.h',
|
||||
'si_nir_lower_abi.c',
|
||||
'si_nir_lower_resource.c',
|
||||
'si_nir_optim.c',
|
||||
'si_sdma_copy_image.c',
|
||||
'si_shader.c',
|
||||
|
|
|
|||
143
src/gallium/drivers/radeonsi/si_nir_lower_resource.c
Normal file
143
src/gallium/drivers/radeonsi/si_nir_lower_resource.c
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
/*
|
||||
* Copyright 2022 Advanced Micro Devices, Inc.
|
||||
* All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* on the rights to use, copy, modify, merge, publish, distribute, sub
|
||||
* license, and/or sell copies of the Software, and to permit persons to whom
|
||||
* the Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
|
||||
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
|
||||
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||
* USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This lowering pass converts index based buffer/image/texture access to
|
||||
* explicite descriptor based, which simplify the compiler backend translation.
|
||||
*
|
||||
* For example: load_ubo(1) -> load_ubo(vec4), where the vec4 is the buffer
|
||||
* descriptor with index==1, so compiler backend don't need to do index-to-descriptor
|
||||
* finding which is the most complicated part (move to nir now).
|
||||
*/
|
||||
|
||||
#include "nir_builder.h"
|
||||
|
||||
#include "ac_nir.h"
|
||||
#include "si_pipe.h"
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
||||
struct lower_resource_state {
|
||||
struct si_shader *shader;
|
||||
struct si_shader_args *args;
|
||||
};
|
||||
|
||||
static nir_ssa_def *load_ubo_desc_fast_path(nir_builder *b, nir_ssa_def *addr_lo,
|
||||
struct si_shader_selector *sel)
|
||||
{
|
||||
nir_ssa_def *addr_hi =
|
||||
nir_imm_int(b, S_008F04_BASE_ADDRESS_HI(sel->screen->info.address32_hi));
|
||||
|
||||
uint32_t rsrc3 =
|
||||
S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (sel->screen->info.gfx_level >= GFX11)
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
|
||||
else if (sel->screen->info.gfx_level >= GFX10)
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
else
|
||||
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
|
||||
return nir_vec4(b, addr_lo, addr_hi, nir_imm_int(b, sel->info.constbuf0_num_slots * 16),
|
||||
nir_imm_int(b, rsrc3));
|
||||
}
|
||||
|
||||
static nir_ssa_def *clamp_index(nir_builder *b, nir_ssa_def *index, unsigned max)
|
||||
{
|
||||
if (util_is_power_of_two_or_zero(max))
|
||||
return nir_iand_imm(b, index, max - 1);
|
||||
else {
|
||||
nir_ssa_def *clamp = nir_imm_int(b, max - 1);
|
||||
nir_ssa_def *cond = nir_uge(b, clamp, index);
|
||||
return nir_bcsel(b, cond, index, clamp);
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *load_ubo_desc(nir_builder *b, nir_ssa_def *index,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
struct si_shader_selector *sel = s->shader->selector;
|
||||
|
||||
nir_ssa_def *addr = ac_nir_load_arg(b, &s->args->ac, s->args->const_and_shader_buffers);
|
||||
|
||||
if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0)
|
||||
return load_ubo_desc_fast_path(b, addr, sel);
|
||||
|
||||
index = clamp_index(b, index, sel->info.base.num_ubos);
|
||||
index = nir_iadd_imm(b, index, SI_NUM_SHADER_BUFFERS);
|
||||
|
||||
nir_ssa_def *offset = nir_ishl_imm(b, index, 4);
|
||||
return nir_load_smem_amd(b, 4, addr, offset);
|
||||
}
|
||||
|
||||
static bool lower_resource_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_load_ubo: {
|
||||
assert(!(nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM));
|
||||
|
||||
nir_ssa_def *desc = load_ubo_desc(b, intrin->src[0].ssa, s);
|
||||
nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[0], desc);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool lower_resource_instr(nir_builder *b, nir_instr *instr, void *state)
|
||||
{
|
||||
struct lower_resource_state *s = (struct lower_resource_state *)state;
|
||||
|
||||
b->cursor = nir_before_instr(instr);
|
||||
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
return lower_resource_intrinsic(b, intrin, s);
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
|
||||
struct si_shader_args *args)
|
||||
{
|
||||
struct lower_resource_state state = {
|
||||
.shader = shader,
|
||||
.args = args,
|
||||
};
|
||||
|
||||
return nir_shader_instructions_pass(nir, lower_resource_instr,
|
||||
nir_metadata_dominance | nir_metadata_block_index,
|
||||
&state);
|
||||
}
|
||||
|
|
@ -1886,6 +1886,8 @@ struct nir_shader *si_get_nir_shader(struct si_shader *shader, struct si_shader_
|
|||
if (sel->stage == MESA_SHADER_FRAGMENT && key->ps.mono.point_smoothing)
|
||||
NIR_PASS(progress, nir, nir_lower_point_smooth);
|
||||
|
||||
NIR_PASS(progress, nir, si_nir_lower_resource, shader, args);
|
||||
|
||||
bool is_last_vgt_stage =
|
||||
(sel->stage == MESA_SHADER_VERTEX ||
|
||||
sel->stage == MESA_SHADER_TESS_EVAL ||
|
||||
|
|
|
|||
|
|
@ -189,6 +189,10 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
|||
/* si_nir_lower_abi.c */
|
||||
bool si_nir_lower_abi(nir_shader *nir, struct si_shader *shader, struct si_shader_args *args);
|
||||
|
||||
/* si_nir_lower_resource.c */
|
||||
bool si_nir_lower_resource(nir_shader *nir, struct si_shader *shader,
|
||||
struct si_shader_args *args);
|
||||
|
||||
/* si_shader_llvm.c */
|
||||
bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
|
||||
struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,
|
||||
|
|
|
|||
|
|
@ -53,60 +53,6 @@ static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx, LLVMValue
|
|||
return index;
|
||||
}
|
||||
|
||||
static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->args->const_and_shader_buffers);
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
|
||||
/* Do the bounds checking with a descriptor, because
|
||||
* doing computation and manual bounds checking of 64-bit
|
||||
* addresses generates horrible VALU code with very high
|
||||
* VGPR usage and very low SIMD occupancy.
|
||||
*/
|
||||
ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
|
||||
|
||||
LLVMValueRef desc0, desc1;
|
||||
desc0 = ptr;
|
||||
desc1 = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
|
||||
|
||||
uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
|
||||
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
|
||||
|
||||
if (ctx->screen->info.gfx_level >= GFX11)
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
|
||||
else if (ctx->screen->info.gfx_level >= GFX10)
|
||||
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
|
||||
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
|
||||
else
|
||||
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
|
||||
LLVMValueRef desc_elems[] = {desc0, desc1,
|
||||
LLVMConstInt(ctx->ac.i32, sel->info.constbuf0_num_slots * 16, 0),
|
||||
LLVMConstInt(ctx->ac.i32, rsrc3, false)};
|
||||
|
||||
return ac_build_gather_values(&ctx->ac, desc_elems, 4);
|
||||
}
|
||||
|
||||
static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
struct si_shader_selector *sel = ctx->shader->selector;
|
||||
|
||||
if (sel->info.base.num_ubos == 1 && sel->info.base.num_ssbos == 0) {
|
||||
return load_const_buffer_desc_fast_path(ctx);
|
||||
}
|
||||
|
||||
index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
|
||||
index =
|
||||
LLVMBuildAdd(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, SI_NUM_SHADER_BUFFERS, 0), "");
|
||||
|
||||
return ac_build_load_to_sgpr(&ctx->ac,
|
||||
ac_get_ptr_arg(&ctx->ac, &ctx->args->ac, ctx->args->const_and_shader_buffers),
|
||||
index);
|
||||
}
|
||||
|
||||
static LLVMValueRef load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write, bool non_uniform)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
|
||||
|
|
@ -335,7 +281,6 @@ static LLVMValueRef si_nir_load_sampler_desc(struct ac_shader_abi *abi, unsigned
|
|||
|
||||
void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
|
||||
{
|
||||
ctx->abi.load_ubo = load_ubo;
|
||||
ctx->abi.load_ssbo = load_ssbo;
|
||||
ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue