radeonsi: Move buffer descriptor slot to the beginning
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Move the buffer descriptor slot to index 0 in 16 dword
image+sampler slot in si_descriptors.c

Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34491>
This commit is contained in:
Saroj Kumar 2025-04-03 10:30:13 +05:30 committed by Marge Bot
parent dc70e1c198
commit 384bf8e58e
5 changed files with 17 additions and 31 deletions

View file

@ -26,8 +26,8 @@
* Possible scenarios for one 16 dword image+sampler slot:
*
* | Image | w/ FMASK | Buffer | NULL
* [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
* [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
* [ 0: 3] Image[0:3] | Image[0:3] | Buffer[0:3] | Null[0:3]
* [ 4: 7] Image[4:7] | Image[4:7] | Buffer[4:7] | 0
* [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
* [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
*
@ -52,22 +52,12 @@
* For images, all fields must be zero except for the swizzle, which
* supports arbitrary combinations of 0s and 1s. The texture type must be
* any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
*
* For buffers, all fields must be zero. If they are not, the hw hangs.
*
* This is the only reason why the buffer descriptor must be in words [4:7].
*/
static uint32_t null_texture_descriptor[8] = {
0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
/* the rest must contain zeros, which is also used by the buffer
* descriptor */
};
static uint32_t null_image_descriptor[8] = {
0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
/* the rest must contain zeros, which is also used by the buffer
* descriptor */
};
static uint32_t null_image_descriptor[8] = {0};
static uint64_t si_desc_extract_buffer_address(const uint32_t *desc)
{
@ -370,7 +360,7 @@ static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_
if (tex->buffer.b.b.target == PIPE_BUFFER) {
memcpy(desc, sview->state, 8 * 4);
memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */
si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4);
si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc);
return;
}
@ -685,7 +675,7 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i
si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, elements,
desc);
si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
si_set_buf_desc_address(res, view->u.buf.offset, desc);
} else {
static const unsigned char swizzle[4] = {0, 1, 2, 3};
struct si_texture *tex = (struct si_texture *)res;
@ -1755,7 +1745,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
unsigned desc_slot = si_get_sampler_slot(i);
si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset,
descs->list + desc_slot * 16 + 4);
descs->list + desc_slot * 16);
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
if (shader != PIPE_SHADER_COMPUTE)
si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
@ -1787,7 +1777,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
si_mark_image_range_valid(&images->views[i]);
si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset,
descs->list + desc_slot * 8 + 4);
descs->list + desc_slot * 8);
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
if (shader != PIPE_SHADER_COMPUTE)
si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers);
@ -1814,7 +1804,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
descs->list + desc_slot * 16 + 4);
descs->list + desc_slot * 16);
(*tex_handle)->desc_dirty = true;
si_mark_bindless_descriptors_dirty(sctx);
@ -1839,7 +1829,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
si_mark_image_range_valid(view);
si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
descs->list + desc_slot * 16 + 4);
descs->list + desc_slot * 16);
(*img_handle)->desc_dirty = true;
si_mark_bindless_descriptors_dirty(sctx);
@ -2452,10 +2442,8 @@ void si_emit_compute_shader_pointers(struct si_context *sctx)
unsigned num_sgprs = 8;
/* Image buffers are in desc[4..7]. */
if (BITSET_TEST(shader->info.base.image_buffers, i)) {
desc_offset += 4;
if (BITSET_TEST(shader->info.base.image_buffers, i))
num_sgprs = 4;
}
radeon_emit_array(&desc->list[desc_offset], num_sgprs);
}
@ -2552,7 +2540,7 @@ static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsign
struct si_descriptors *desc = &sctx->bindless_descriptors;
struct si_resource *buf = si_resource(resource);
unsigned desc_slot_offset = desc_slot * 16;
uint32_t *desc_list = desc->list + desc_slot_offset + 4;
uint32_t *desc_list = desc->list + desc_slot_offset;
uint64_t old_desc_va;
assert(resource->target == PIPE_BUFFER);

View file

@ -145,7 +145,6 @@ static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index,
unsigned num_channels;
if (desc_type == AC_DESC_BUFFER) {
offset = nir_iadd_imm(b, offset, 16);
num_channels = 4;
} else {
assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK);
@ -410,8 +409,7 @@ static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index,
num_channels = 8;
break;
case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
offset = nir_iadd_imm(b, offset, 16);
/* The buffer is in [0:3]. */
num_channels = 4;
break;
case AC_DESC_FMASK:

View file

@ -677,8 +677,7 @@ struct si_compute {
struct si_sampler_view {
struct pipe_sampler_view base;
/* [0..7] = image descriptor
* [4..7] = buffer descriptor */
/* [0..7] = image or buffer descriptor */
uint32_t state[8];
uint32_t fmask_state[8];
const struct legacy_surf_level *base_level_info;

View file

@ -491,6 +491,7 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
LLVMBuilderRef builder = ctx->ac.builder;
/* This is only used by divergent sampler and image indexing to build the waterfall loop. */
if (index && LLVMTypeOf(index) == ctx->ac.i32) {
bool is_vec4 = false;
@ -500,8 +501,8 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), "");
break;
case AC_DESC_BUFFER:
/* The buffer is in [4:7]. */
index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1);
/* The buffer is in [0:3]. */
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 4, 0), "");
is_vec4 = true;
break;
case AC_DESC_FMASK:

View file

@ -3678,7 +3678,7 @@ void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf
.gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET,
};
ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[4]);
ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[0]);
}
/**