mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-04-26 22:00:37 +02:00
radeonsi: move CS user SGPR layout determination into si_shader_variant_info
Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38802>
This commit is contained in:
parent
1bff4115e7
commit
04e6e70e69
8 changed files with 82 additions and 72 deletions
|
|
@ -47,49 +47,6 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
|
|||
program->shader.is_monolithic = true;
|
||||
program->shader.wave_size = si_determine_wave_size(sscreen, &program->shader);
|
||||
|
||||
/* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim.
|
||||
* We pack them into a single user SGPR.
|
||||
*/
|
||||
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS + (sel->info.uses_sysval_num_workgroups ? 3 : 0) +
|
||||
(sel->info.uses_sysval_workgroup_size ? 1 : 0) +
|
||||
sel->nir->info.cs.user_data_components_amd;
|
||||
|
||||
if (sel->stage != MESA_SHADER_TASK) {
|
||||
/* Fast path for compute shaders - some descriptors passed via user SGPRs. */
|
||||
/* Shader buffers in user SGPRs. */
|
||||
for (unsigned i = 0; i < MIN2(3, sel->nir->info.num_ssbos) && user_sgprs <= 12; i++) {
|
||||
user_sgprs = align(user_sgprs, 4);
|
||||
if (i == 0)
|
||||
sel->cs_shaderbufs_sgpr_index = user_sgprs;
|
||||
user_sgprs += 4;
|
||||
sel->cs_num_shaderbufs_in_user_sgprs++;
|
||||
}
|
||||
|
||||
/* Images in user SGPRs. */
|
||||
unsigned non_fmask_images = BITFIELD_MASK(sel->nir->info.num_images);
|
||||
|
||||
/* Remove images with FMASK from the bitmask. We only care about the first
|
||||
* 3 anyway, so we can take msaa_images[0] and ignore the rest.
|
||||
*/
|
||||
if (sscreen->info.gfx_level < GFX11)
|
||||
non_fmask_images &= ~sel->nir->info.msaa_images[0];
|
||||
|
||||
for (unsigned i = 0; i < 3 && non_fmask_images & (1 << i); i++) {
|
||||
unsigned num_sgprs = BITSET_TEST(sel->nir->info.image_buffers, i) ? 4 : 8;
|
||||
|
||||
if (align(user_sgprs, num_sgprs) + num_sgprs > 16)
|
||||
break;
|
||||
|
||||
user_sgprs = align(user_sgprs, num_sgprs);
|
||||
if (i == 0)
|
||||
sel->cs_images_sgpr_index = user_sgprs;
|
||||
user_sgprs += num_sgprs;
|
||||
sel->cs_num_images_in_user_sgprs++;
|
||||
}
|
||||
sel->cs_images_num_sgprs = user_sgprs - sel->cs_images_sgpr_index;
|
||||
}
|
||||
assert(user_sgprs <= 16);
|
||||
|
||||
unsigned char ir_sha1_cache_key[20];
|
||||
si_get_ir_cache_key(sel, false, false, shader->wave_size, ir_sha1_cache_key);
|
||||
|
||||
|
|
@ -114,12 +71,6 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
|
|||
return;
|
||||
}
|
||||
|
||||
/* task ring entry and draw id
|
||||
* note uses_draw_id is only available after shader variant creation
|
||||
*/
|
||||
if (sel->stage == MESA_SHADER_TASK)
|
||||
user_sgprs += shader->info.uses_sysval_draw_id ? 3 : 2;
|
||||
|
||||
shader->config.rsrc1 = S_00B848_VGPRS(si_shader_encode_vgprs(shader)) |
|
||||
S_00B848_SGPRS(si_shader_encode_sgprs(shader)) |
|
||||
S_00B848_DX10_CLAMP(sscreen->info.gfx_level < GFX12) |
|
||||
|
|
@ -128,7 +79,7 @@ static void si_create_compute_state_async(void *job, void *gdata, int thread_ind
|
|||
/* This is needed for CWSR, but it causes halts to work differently. */
|
||||
S_00B848_PRIV(sscreen->info.gfx_level == GFX11);
|
||||
|
||||
shader->config.rsrc2 = S_00B84C_USER_SGPR(user_sgprs) |
|
||||
shader->config.rsrc2 = S_00B84C_USER_SGPR(shader->info.cs_num_user_sgprs) |
|
||||
S_00B84C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
|
||||
S_00B84C_TGID_X_EN(sel->info.uses_sysval_workgroup_id[0]) |
|
||||
S_00B84C_TGID_Y_EN(sel->info.uses_sysval_workgroup_id[1]) |
|
||||
|
|
|
|||
|
|
@ -836,7 +836,7 @@ static void si_set_shader_images(struct pipe_context *pipe, mesa_shader_stage sh
|
|||
|
||||
if (shader == MESA_SHADER_COMPUTE &&
|
||||
ctx->cs_shader_state.program &&
|
||||
start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs)
|
||||
start_slot < ctx->cs_shader_state.program->shader.info.cs_num_images_in_user_sgprs)
|
||||
ctx->compute_image_sgprs_dirty = true;
|
||||
|
||||
si_update_shader_needs_decompress_mask(ctx, shader);
|
||||
|
|
@ -1343,7 +1343,7 @@ void si_set_shader_buffers(struct pipe_context *ctx, mesa_shader_stage shader,
|
|||
|
||||
if (shader == MESA_SHADER_COMPUTE &&
|
||||
sctx->cs_shader_state.program &&
|
||||
start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs)
|
||||
start_slot < sctx->cs_shader_state.program->shader.info.cs_num_shaderbufs_in_user_sgprs)
|
||||
sctx->compute_shaderbuf_sgprs_dirty = true;
|
||||
|
||||
for (i = 0; i < count; ++i) {
|
||||
|
|
@ -2442,14 +2442,14 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
|||
radeon_begin(&sctx->gfx_cs);
|
||||
|
||||
/* Set shader buffer descriptors in user SGPRs. */
|
||||
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
|
||||
unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;
|
||||
struct si_shader *shader = &sctx->cs_shader_state.program->shader;
|
||||
unsigned num_shaderbufs = shader->info.cs_num_shaderbufs_in_user_sgprs;
|
||||
|
||||
if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
|
||||
struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, MESA_SHADER_COMPUTE);
|
||||
|
||||
radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 +
|
||||
shader->cs_shaderbufs_sgpr_index * 4,
|
||||
shader->info.cs_shaderbufs_sgpr_index * 4,
|
||||
num_shaderbufs * 4);
|
||||
|
||||
for (unsigned i = 0; i < num_shaderbufs; i++)
|
||||
|
|
@ -2459,20 +2459,20 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
|
|||
}
|
||||
|
||||
/* Set image descriptors in user SGPRs. */
|
||||
unsigned num_images = shader->cs_num_images_in_user_sgprs;
|
||||
unsigned num_images = shader->info.cs_num_images_in_user_sgprs;
|
||||
if (num_images && sctx->compute_image_sgprs_dirty) {
|
||||
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, MESA_SHADER_COMPUTE);
|
||||
|
||||
radeon_set_sh_reg_seq(R_00B900_COMPUTE_USER_DATA_0 +
|
||||
shader->cs_images_sgpr_index * 4,
|
||||
shader->cs_images_num_sgprs);
|
||||
shader->info.cs_images_sgpr_index * 4,
|
||||
shader->info.cs_images_num_sgprs);
|
||||
|
||||
for (unsigned i = 0; i < num_images; i++) {
|
||||
unsigned desc_offset = si_get_image_slot(i) * 8;
|
||||
unsigned num_sgprs = 8;
|
||||
|
||||
/* Image buffers are in desc[4..7]. */
|
||||
if (shader->info.base.image_buffers & BITFIELD_BIT(i))
|
||||
if (shader->info.cs_image_buffer_mask & BITFIELD_BIT(i))
|
||||
num_sgprs = 4;
|
||||
|
||||
radeon_emit_array(&desc->list[desc_offset], num_sgprs);
|
||||
|
|
|
|||
|
|
@ -83,12 +83,10 @@ static nir_def *load_ubo_desc(nir_builder *b, nir_def *index,
|
|||
static nir_def *load_ssbo_desc(nir_builder *b, nir_src *index,
|
||||
struct lower_resource_state *s)
|
||||
{
|
||||
struct si_shader_selector *sel = s->shader->selector;
|
||||
|
||||
/* Fast path if the shader buffer is in user SGPRs. */
|
||||
if (nir_src_is_const(*index)) {
|
||||
unsigned slot = nir_src_as_uint(*index);
|
||||
if (slot < sel->cs_num_shaderbufs_in_user_sgprs)
|
||||
if (slot < s->shader->info.cs_num_shaderbufs_in_user_sgprs)
|
||||
return ac_nir_load_arg(b, &s->args->ac, s->args->cs_shaderbuf[slot]);
|
||||
}
|
||||
|
||||
|
|
@ -228,7 +226,7 @@ static nir_def *load_deref_image_desc(nir_builder *b, nir_deref_instr *deref,
|
|||
|
||||
nir_def *desc;
|
||||
if (!dynamic_index && desc_type != AC_DESC_FMASK &&
|
||||
const_index < s->shader->selector->cs_num_images_in_user_sgprs) {
|
||||
const_index < s->shader->info.cs_num_images_in_user_sgprs) {
|
||||
/* Fast path if the image is in user SGPRs. */
|
||||
desc = ac_nir_load_arg(b, &s->args->ac, s->args->cs_image[const_index]);
|
||||
|
||||
|
|
|
|||
|
|
@ -498,11 +498,6 @@ struct si_shader_selector {
|
|||
|
||||
uint8_t const_and_shader_buf_descriptors_index;
|
||||
uint8_t sampler_and_images_descriptors_index;
|
||||
uint8_t cs_shaderbufs_sgpr_index;
|
||||
uint8_t cs_num_shaderbufs_in_user_sgprs;
|
||||
uint8_t cs_images_sgpr_index;
|
||||
uint8_t cs_images_num_sgprs;
|
||||
uint8_t cs_num_images_in_user_sgprs;
|
||||
unsigned ngg_cull_vert_threshold; /* UINT32_MAX = disabled */
|
||||
enum mesa_prim rast_prim;
|
||||
|
||||
|
|
|
|||
|
|
@ -540,14 +540,14 @@ void si_init_shader_args(struct si_shader *shader, struct si_shader_args *args,
|
|||
|
||||
/* Some descriptors can be in user SGPRs. */
|
||||
/* Shader buffers in user SGPRs. */
|
||||
for (unsigned i = 0; i < shader->selector->cs_num_shaderbufs_in_user_sgprs; i++) {
|
||||
for (unsigned i = 0; i < shader->info.cs_num_shaderbufs_in_user_sgprs; i++) {
|
||||
while (args->ac.num_sgprs_used % 4 != 0)
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_VALUE, NULL);
|
||||
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 4, AC_ARG_VALUE, &args->cs_shaderbuf[i]);
|
||||
}
|
||||
/* Images in user SGPRs. */
|
||||
for (unsigned i = 0; i < shader->selector->cs_num_images_in_user_sgprs; i++) {
|
||||
for (unsigned i = 0; i < shader->info.cs_num_images_in_user_sgprs; i++) {
|
||||
unsigned num_sgprs = BITSET_TEST(info->image_buffers, i) ? 4 : 8;
|
||||
|
||||
while (args->ac.num_sgprs_used % num_sgprs != 0)
|
||||
|
|
|
|||
|
|
@ -441,7 +441,6 @@ void si_nir_gather_info(struct si_screen *sscreen, struct nir_shader *nir,
|
|||
info->base.num_ssbos = nir->info.num_ssbos;
|
||||
info->base.num_images = nir->info.num_images;
|
||||
info->base.textures_used = nir->info.textures_used[0];
|
||||
info->base.image_buffers = nir->info.image_buffers[0];
|
||||
info->base.msaa_images = nir->info.msaa_images[0];
|
||||
|
||||
info->base.task_payload_size = nir->info.task_payload_size;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ struct si_shader_info {
|
|||
uint8_t num_ssbos;
|
||||
uint8_t num_images;
|
||||
uint32_t textures_used;
|
||||
uint32_t image_buffers;
|
||||
uint32_t msaa_images;
|
||||
|
||||
unsigned task_payload_size;
|
||||
|
|
@ -231,6 +230,13 @@ struct si_shader_variant_info {
|
|||
uint8_t num_streamout_vec4s;
|
||||
uint8_t max_simd_waves;
|
||||
uint8_t ngg_lds_scratch_size;
|
||||
uint8_t cs_num_user_sgprs;
|
||||
uint8_t cs_shaderbufs_sgpr_index;
|
||||
uint8_t cs_num_shaderbufs_in_user_sgprs;
|
||||
uint8_t cs_images_sgpr_index;
|
||||
uint8_t cs_images_num_sgprs;
|
||||
uint8_t cs_num_images_in_user_sgprs;
|
||||
uint8_t cs_image_buffer_mask; /* which image bindings are buffers, only the first few bits matter */
|
||||
uint16_t private_mem_vgprs;
|
||||
uint32_t ngg_lds_vertex_size; /* VS,TES: Cull+XFB, GS: GSVS size */
|
||||
uint32_t shared_size;
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "nir.h"
|
||||
#include "nir_range_analysis.h"
|
||||
#include "sid.h"
|
||||
#include "si_pipe.h"
|
||||
|
||||
void si_get_shader_variant_info(struct si_shader *shader,
|
||||
struct si_temp_shader_variant_info *temp_info, nir_shader *nir)
|
||||
|
|
@ -303,6 +304,66 @@ void si_get_shader_variant_info(struct si_shader *shader,
|
|||
|
||||
shader->info.clipdist_mask &= ~shader->key.ge.opt.kill_clip_distances;
|
||||
}
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_COMPUTE ||
|
||||
nir->info.stage == MESA_SHADER_KERNEL ||
|
||||
nir->info.stage == MESA_SHADER_TASK) {
|
||||
/* Determine user SGPRs for compute shader. This includes descriptors in user SGPRs.
|
||||
*
|
||||
* Variable block sizes need 10 bits (1 + log2(SI_MAX_VARIABLE_THREADS_PER_BLOCK)) per dim.
|
||||
* We pack them into a single user SGPR.
|
||||
*/
|
||||
unsigned num_user_sgprs = SI_NUM_RESOURCE_SGPRS + (shader->selector->info.uses_sysval_num_workgroups ? 3 : 0) +
|
||||
(shader->selector->info.uses_sysval_workgroup_size ? 1 : 0) +
|
||||
shader->selector->nir->info.cs.user_data_components_amd;
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
/* task ring entry and draw id
|
||||
* note uses_draw_id is only available after shader variant creation
|
||||
*/
|
||||
num_user_sgprs += shader->info.uses_sysval_draw_id ? 3 : 2;
|
||||
} else {
|
||||
/* Compute shaders */
|
||||
/* Fast path for compute shaders - some descriptors passed via user SGPRs. */
|
||||
/* Shader buffers in user SGPRs. */
|
||||
for (unsigned i = 0; i < MIN2(3, nir->info.num_ssbos) && num_user_sgprs <= 12; i++) {
|
||||
num_user_sgprs = align(num_user_sgprs, 4);
|
||||
if (i == 0)
|
||||
shader->info.cs_shaderbufs_sgpr_index = num_user_sgprs;
|
||||
num_user_sgprs += 4;
|
||||
shader->info.cs_num_shaderbufs_in_user_sgprs++;
|
||||
}
|
||||
|
||||
/* Images in user SGPRs. */
|
||||
unsigned non_fmask_images = BITFIELD_MASK(nir->info.num_images);
|
||||
|
||||
/* Remove images with FMASK from the bitmask. We only care about the first
|
||||
* 3 anyway, so we can take msaa_images[0] and ignore the rest.
|
||||
*/
|
||||
if (shader->selector->screen->info.gfx_level < GFX11)
|
||||
non_fmask_images &= ~nir->info.msaa_images[0];
|
||||
|
||||
for (unsigned i = 0; i < 3 && non_fmask_images & (1 << i); i++) {
|
||||
unsigned num_sgprs = BITSET_TEST(nir->info.image_buffers, i) ? 4 : 8;
|
||||
|
||||
if (align(num_user_sgprs, num_sgprs) + num_sgprs > 16)
|
||||
break;
|
||||
|
||||
num_user_sgprs = align(num_user_sgprs, num_sgprs);
|
||||
if (i == 0)
|
||||
shader->info.cs_images_sgpr_index = num_user_sgprs;
|
||||
num_user_sgprs += num_sgprs;
|
||||
shader->info.cs_num_images_in_user_sgprs++;
|
||||
}
|
||||
|
||||
shader->info.cs_images_num_sgprs = num_user_sgprs - shader->info.cs_images_sgpr_index;
|
||||
/* Only the first few bits matter. */
|
||||
shader->info.cs_image_buffer_mask = nir->info.image_buffers[0];
|
||||
}
|
||||
|
||||
assert(num_user_sgprs <= 16);
|
||||
shader->info.cs_num_user_sgprs = num_user_sgprs;
|
||||
}
|
||||
}
|
||||
|
||||
/* Late shader variant info for AMD-specific intrinsics. */
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue