diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index d4a42fdce47..a0ac54fd283 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -26,8 +26,8 @@ * Possible scenarios for one 16 dword image+sampler slot: * * | Image | w/ FMASK | Buffer | NULL - * [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3] - * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0 + * [ 0: 3] Image[0:3] | Image[0:3] | Buffer[0:3] | Null[0:3] + * [ 4: 7] Image[4:7] | Image[4:7] | Buffer[4:7] | 0 * [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3] * [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3] * @@ -52,22 +52,12 @@ * For images, all fields must be zero except for the swizzle, which * supports arbitrary combinations of 0s and 1s. The texture type must be * any valid type (e.g. 1D). If the texture type isn't set, the hw hangs. - * - * For buffers, all fields must be zero. If they are not, the hw hangs. - * - * This is the only reason why the buffer descriptor must be in words [4:7]. */ static uint32_t null_texture_descriptor[8] = { 0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) - /* the rest must contain zeros, which is also used by the buffer - * descriptor */ }; -static uint32_t null_image_descriptor[8] = { - 0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D) - /* the rest must contain zeros, which is also used by the buffer - * descriptor */ -}; +static uint32_t null_image_descriptor[8] = {0}; static uint64_t si_desc_extract_buffer_address(const uint32_t *desc) { @@ -370,7 +360,7 @@ static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_ if (tex->buffer.b.b.target == PIPE_BUFFER) { memcpy(desc, sview->state, 8 * 4); memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */ - si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4); + si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc); return; } @@ -685,7 +675,7 @@ static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_i si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, elements, desc); - si_set_buf_desc_address(res, view->u.buf.offset, desc + 4); + si_set_buf_desc_address(res, view->u.buf.offset, desc); } else { static const unsigned char swizzle[4] = {0, 1, 2, 3}; struct si_texture *tex = (struct si_texture *)res; @@ -1755,7 +1745,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) unsigned desc_slot = si_get_sampler_slot(i); si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset, - descs->list + desc_slot * 16 + 4); + descs->list + desc_slot * 16); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); if (shader != PIPE_SHADER_COMPUTE) si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers); @@ -1787,7 +1777,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) si_mark_image_range_valid(&images->views[i]); si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset, - descs->list + desc_slot * 8 + 4); + descs->list + desc_slot * 8); sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader); if (shader != PIPE_SHADER_COMPUTE) si_mark_atom_dirty(sctx, &sctx->atoms.s.gfx_shader_pointers); @@ -1814,7 +1804,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) { si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, - descs->list + desc_slot * 16 + 4); + descs->list + desc_slot * 16); (*tex_handle)->desc_dirty = true; si_mark_bindless_descriptors_dirty(sctx); @@ -1839,7 +1829,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf) si_mark_image_range_valid(view); si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset, - descs->list + desc_slot * 16 + 4); + descs->list + desc_slot * 16); (*img_handle)->desc_dirty = true; si_mark_bindless_descriptors_dirty(sctx); @@ -2452,10 +2442,8 @@ void si_emit_compute_shader_pointers(struct si_context *sctx) unsigned num_sgprs = 8; /* Image buffers are in desc[4..7]. */ - if (BITSET_TEST(shader->info.base.image_buffers, i)) { - desc_offset += 4; + if (BITSET_TEST(shader->info.base.image_buffers, i)) num_sgprs = 4; - } radeon_emit_array(&desc->list[desc_offset], num_sgprs); } @@ -2552,7 +2540,7 @@ static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsign struct si_descriptors *desc = &sctx->bindless_descriptors; struct si_resource *buf = si_resource(resource); unsigned desc_slot_offset = desc_slot * 16; - uint32_t *desc_list = desc->list + desc_slot_offset + 4; + uint32_t *desc_list = desc->list + desc_slot_offset; uint64_t old_desc_va; assert(resource->target == PIPE_BUFFER); diff --git a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c index e334a016b90..462ecc0cba8 100644 --- a/src/gallium/drivers/radeonsi/si_nir_lower_resource.c +++ b/src/gallium/drivers/radeonsi/si_nir_lower_resource.c @@ -145,7 +145,6 @@ static nir_def *load_image_desc(nir_builder *b, nir_def *list, nir_def *index, unsigned num_channels; if (desc_type == AC_DESC_BUFFER) { - offset = nir_iadd_imm(b, offset, 16); num_channels = 4; } else { assert(desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_FMASK); @@ -410,8 +409,7 @@ static nir_def *load_sampler_desc(nir_builder *b, nir_def *list, nir_def *index, num_channels = 8; break; case AC_DESC_BUFFER: - /* The buffer is in [4:7]. */ - offset = nir_iadd_imm(b, offset, 16); + /* The buffer is in [0:3]. */ num_channels = 4; break; case AC_DESC_FMASK: diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h index a9207b3e20a..d4534bb9a57 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.h +++ b/src/gallium/drivers/radeonsi/si_pipe.h @@ -677,8 +677,7 @@ struct si_compute { struct si_sampler_view { struct pipe_sampler_view base; - /* [0..7] = image descriptor - * [4..7] = buffer descriptor */ + /* [0..7] = image or buffer descriptor */ uint32_t state[8]; uint32_t fmask_state[8]; const struct legacy_surf_level *base_level_info; diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 75b2fb049c3..b42cbf0591a 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -491,6 +491,7 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal struct si_shader_context *ctx = si_shader_context_from_abi(abi); LLVMBuilderRef builder = ctx->ac.builder; + /* This is only used by divergent sampler and image indexing to build the waterfall loop. */ if (index && LLVMTypeOf(index) == ctx->ac.i32) { bool is_vec4 = false; @@ -500,8 +501,8 @@ static LLVMValueRef si_llvm_load_sampler_desc(struct ac_shader_abi *abi, LLVMVal index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 2, 0), ""); break; case AC_DESC_BUFFER: - /* The buffer is in [4:7]. */ - index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->ac.i32, 4, 0), ctx->ac.i32_1); + /* The buffer is in [0:3]. */ + index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, 4, 0), ""); is_vec4 = true; break; case AC_DESC_FMASK: diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index b8ac2865c88..8061be47aa7 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -3678,7 +3678,7 @@ void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf .gfx10_oob_select = V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET, }; - ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[4]); + ac_build_buffer_descriptor(screen->info.gfx_level, &buffer_state, &state[0]); } /**