radeonsi/gfx11: pass attribute ring addr via SGPR instead of memory for blits

This removes the scalar memory load from blit vertex shaders.

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/24759>
This commit is contained in:
Marek Olšák 2023-08-14 16:17:58 -04:00 committed by Marge Bot
parent bfdff13f91
commit 1a2c12937d
9 changed files with 30 additions and 17 deletions

View file

@ -2898,9 +2898,6 @@ void si_init_all_descriptors(struct si_context *sctx)
si_get_user_data_base(sctx->gfx_level, TESS_OFF, GS_OFF,
NGG_OFF, PIPE_SHADER_GEOMETRY));
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
si_set_ring_buffer(sctx, SI_GS_ATTRIBUTE_RING, &sctx->screen->attribute_ring->b.b,
0, ~0u, false, true, 16, 32, 0);
}
void si_release_all_descriptors(struct si_context *sctx)

View file

@ -72,7 +72,8 @@ static nir_def *build_attr_ring_desc(nir_builder *b, struct si_shader *shader,
nir_def *attr_address =
sel->stage == MESA_SHADER_VERTEX && sel->info.base.vs.blit_sgprs_amd ?
si_nir_load_internal_binding(b, args, SI_GS_ATTRIBUTE_RING, 4) :
ac_nir_load_arg_at_offset(b, &args->ac, args->vs_blit_inputs,
sel->info.base.vs.blit_sgprs_amd - 1) :
ac_nir_load_arg(b, &args->ac, args->gs_attr_address);
unsigned stride = 16 * shader->info.nr_param_exports;

View file

@ -137,15 +137,17 @@ load_vs_input_from_blit_sgpr(nir_builder *b, unsigned input_index,
out[2] = ac_nir_load_arg_at_offset(b, &s->args->ac, s->args->vs_blit_inputs, 2);
out[3] = nir_imm_float(b, 1);
} else {
bool has_attribute_ring_address = s->shader->selector->screen->info.gfx_level >= GFX11;
/* Color or texture coordinates: */
assert(input_index == 1);
unsigned vs_blit_property = s->shader->selector->info.base.vs.blit_sgprs_amd;
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR + has_attribute_ring_address) {
for (int i = 0; i < 4; i++)
out[i] = ac_nir_load_arg_at_offset(b, &s->args->ac, s->args->vs_blit_inputs, 3 + i);
} else {
assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD);
assert(vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD + has_attribute_ring_address);
nir_def *x1 = ac_nir_load_arg_at_offset(b, &s->args->ac, s->args->vs_blit_inputs, 3);
nir_def *y1 = ac_nir_load_arg_at_offset(b, &s->args->ac, s->args->vs_blit_inputs, 4);

View file

@ -1138,7 +1138,7 @@ struct si_context {
union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */
unsigned border_color_count;
unsigned num_vs_blit_sgprs;
uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];
uint32_t vs_blit_sh_data[MAX_SI_VS_BLIT_SGPRS];
uint32_t cs_user_data[4];
/* Vertex buffers. */

View file

@ -290,24 +290,32 @@ static void declare_vs_input_vgprs(struct si_shader_args *args, struct si_shader
}
}
static void declare_vs_blit_inputs(struct si_shader_args *args, unsigned vs_blit_property)
static void declare_vs_blit_inputs(struct si_shader *shader, struct si_shader_args *args)
{
bool has_attribute_ring_address = shader->selector->screen->info.gfx_level >= GFX11;
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_blit_inputs); /* i16 x1, y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* i16 x1, y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* depth */
if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_COLOR) {
if (shader->selector->info.base.vs.blit_sgprs_amd ==
SI_VS_BLIT_SGPRS_POS_COLOR + has_attribute_ring_address) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color0 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* color3 */
} else if (vs_blit_property == SI_VS_BLIT_SGPRS_POS_TEXCOORD) {
if (has_attribute_ring_address)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* attribute ring address */
} else if (shader->selector->info.base.vs.blit_sgprs_amd ==
SI_VS_BLIT_SGPRS_POS_TEXCOORD + has_attribute_ring_address) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y1 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.x2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.y2 */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.z */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL); /* texcoord.w */
if (has_attribute_ring_address)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* attribute ring address */
}
}
@ -356,7 +364,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
declare_global_desc_pointers(args);
if (sel->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(args, sel->info.base.vs.blit_sgprs_amd);
declare_vs_blit_inputs(shader, args);
} else {
declare_per_stage_desc_pointers(args, shader, true);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);
@ -523,7 +531,7 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args)
}
if (stage == MESA_SHADER_VERTEX && sel->info.base.vs.blit_sgprs_amd) {
declare_vs_blit_inputs(args, sel->info.base.vs.blit_sgprs_amd);
declare_vs_blit_inputs(shader, args);
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->vs_state_bits);

View file

@ -289,6 +289,8 @@ enum
SI_VS_BLIT_SGPRS_POS = 3,
SI_VS_BLIT_SGPRS_POS_COLOR = 7,
SI_VS_BLIT_SGPRS_POS_TEXCOORD = 9,
MAX_SI_VS_BLIT_SGPRS = 10, /* +1 for the attribute ring address */
};
#define SI_NGG_CULL_TRIANGLES (1 << 0) /* this implies W, view.xy, and small prim culling */

View file

@ -39,6 +39,10 @@ void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,
if (!ureg)
return NULL;
/* Add 1 for the attribute ring address. */
if (sctx->gfx_level >= GFX11 && type != UTIL_BLITTER_ATTRIB_NONE)
vs_blit_property++;
/* Tell the shader to load VS inputs from SGPRs: */
ureg_property(ureg, TGSI_PROPERTY_VS_BLIT_SGPRS_AMD, vs_blit_property);
ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, true);

View file

@ -415,11 +415,6 @@ enum
/* Aliases to reuse slots that are unused on other generations. */
SI_GS_QUERY_BUF = SI_RING_ESGS, /* gfx10+ */
/* Only u_blitter uses this (and compute should be used in most cases, so this shouldn't
* be used much). Normal draws get the address from a user SGPR.
*/
SI_GS_ATTRIBUTE_RING = SI_RING_GSVS, /* gfx11+ */
};
/* Indices into sctx->descriptors, laid out so that gfx and compute pipelines

View file

@ -2354,6 +2354,8 @@ static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elem
{
struct pipe_context *pipe = util_blitter_get_pipe(blitter);
struct si_context *sctx = (struct si_context *)pipe;
uint32_t attribute_ring_address_lo =
sctx->gfx_level >= GFX11 ? sctx->screen->attribute_ring->gpu_address : 0;
/* Pack position coordinates as signed int16. */
sctx->vs_blit_sh_data[0] = (uint32_t)(x1 & 0xffff) | ((uint32_t)(y1 & 0xffff) << 16);
@ -2363,10 +2365,12 @@ static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elem
switch (type) {
case UTIL_BLITTER_ATTRIB_COLOR:
memcpy(&sctx->vs_blit_sh_data[3], attrib->color, sizeof(float) * 4);
sctx->vs_blit_sh_data[7] = attribute_ring_address_lo;
break;
case UTIL_BLITTER_ATTRIB_TEXCOORD_XY:
case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW:
memcpy(&sctx->vs_blit_sh_data[3], &attrib->texcoord, sizeof(attrib->texcoord));
sctx->vs_blit_sh_data[9] = attribute_ring_address_lo;
break;
case UTIL_BLITTER_ATTRIB_NONE:;
}