freedreno/ir3: Track # of app UBOs

Before the gallium driver can support load_shader_consts_via_preamble it
needs a way to differentiate between # of API level UBOs and the number
of UBOs after the driver inserts ones for driver-params, etc.  This is
because the driver UBOs are emit in a different place from user UBOs.
If we didn't have this distinction, and the app had more UBOs bound than
the shader used, we would otherwise emit some conflicting UBO
descriptors (ie. ones for unused const slots conflicting with the driver
UBO slots).

This also moves the consts_ubo emit into the PROG state.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31534>
This commit is contained in:
Rob Clark 2024-10-03 11:48:56 -07:00 committed by Marge Bot
parent 94c3c39f21
commit 97ab362914
3 changed files with 37 additions and 10 deletions

View file

@ -397,6 +397,9 @@ create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,
v->cs.force_linear_dispatch = shader->cs.force_linear_dispatch;
}
struct ir3_const_state *const_state = ir3_const_state_mut(v);
const_state->num_app_ubos = MAX2(1, shader->nir->info.num_ubos);
if (!compile_variant(shader, v))
goto fail;

View file

@ -215,6 +215,7 @@ struct ir3_driver_ubo {
*/
struct ir3_const_state {
unsigned num_ubos;
unsigned num_app_ubos; /* # of UBOs not including driver UBOs */
unsigned num_driver_params; /* scalar */
struct ir3_driver_ubo consts_ubo;

View file

@ -14,6 +14,28 @@
#define emit_const_bo fd6_emit_const_bo
#include "ir3_const.h"
static inline void
fd6_emit_driver_ubo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v,
int base, uint32_t sizedwords, unsigned buffer_offset,
struct fd_bo *bo)
{
enum a6xx_state_block block = fd6_stage2shadersb(v->type);
/* base == ubo idx */
OUT_PKT7(ring, fd6_stage2opcode(v->type), 5);
OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(base) |
CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO) |
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
CP_LOAD_STATE6_0_STATE_BLOCK(block) |
CP_LOAD_STATE6_0_NUM_UNIT(1));
OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
int size_vec4s = DIV_ROUND_UP(sizedwords, 4);
OUT_RELOC(ring, bo, buffer_offset,
((uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32), 0);
}
/* regid: base const register
* prsc or dwords: buffer containing constant values
* sizedwords: size of const value buffer
@ -190,7 +212,7 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
struct fd_constbuf_stateobj *constbuf)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
int num_ubos = const_state->num_ubos;
int num_ubos = const_state->num_app_ubos;
if (!num_ubos)
return;
@ -205,14 +227,6 @@ fd6_emit_ubos(const struct ir3_shader_variant *v, struct fd_ringbuffer *ring,
OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
for (int i = 0; i < num_ubos; i++) {
/* NIR constant data is packed into the end of the shader. */
if (i == const_state->consts_ubo.idx) {
int size_vec4s = DIV_ROUND_UP(v->constant_data_size, 16);
OUT_RELOC(ring, v->bo, v->info.constant_data_offset,
(uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32, 0);
continue;
}
struct pipe_constant_buffer *cb = &constbuf->cb[i];
if (cb->buffer) {
@ -241,7 +255,7 @@ fd6_user_consts_cmdstream_size(const struct ir3_shader_variant *v)
/* also account for UBO addresses: */
packets += 1;
size += 2 * const_state->num_ubos;
size += 2 * const_state->num_app_ubos;
unsigned sizedwords = (4 * packets) + size;
return sizedwords * 4;
@ -360,6 +374,15 @@ void
fd6_emit_immediates(const struct ir3_shader_variant *v,
struct fd_ringbuffer *ring)
{
const struct ir3_const_state *const_state = ir3_const_state(v);
if (const_state->consts_ubo.idx >= 0) {
int sizedwords = DIV_ROUND_UP(v->constant_data_size, 4);
fd6_emit_driver_ubo(ring, v, const_state->consts_ubo.idx, sizedwords,
v->info.constant_data_offset, v->bo);
}
ir3_emit_immediates(v, ring);
}