mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 06:30:10 +01:00
broadcom/compiler: use unifa for UBO loads from uniform addresses
This basically processes UBO loads as uniform loads by writing the load address to the unifa register and reading sequential values with ldunifa. This process is faster than going through the TMU, but we can only use it when the address we are reading from is uniform across all channels, since we are basically reading from the UBO address as if it was a uniform stream. This leads to better performance in the UE4 Shooter demo. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8980>
This commit is contained in:
parent
878555976e
commit
82981ccbb1
1 changed files with 40 additions and 1 deletions
|
|
@ -2564,6 +2564,42 @@ ntq_emit_load_interpolated_input(struct v3d_compile *c,
|
|||
return vir_FADD(c, vir_FMUL(c, pInterp, wInterp), C);
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_load_ubo_unifa(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
{
|
||||
bool dynamic_src = !nir_src_is_const(instr->src[1]);
|
||||
uint32_t const_offset =
|
||||
dynamic_src ? 0 : nir_src_as_uint(instr->src[1]);
|
||||
|
||||
/* On OpenGL QUNIFORM_UBO_ADDR takes a UBO index
|
||||
* shifted up by 1 (0 is gallium's constant buffer 0).
|
||||
*/
|
||||
uint32_t index = nir_src_as_uint(instr->src[0]);
|
||||
if (c->key->environment == V3D_ENVIRONMENT_OPENGL)
|
||||
index++;
|
||||
|
||||
struct qreg base_offset =
|
||||
vir_uniform(c, QUNIFORM_UBO_ADDR,
|
||||
v3d_unit_data_create(index, const_offset));
|
||||
const_offset = 0;
|
||||
|
||||
struct qreg unifa = vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_UNIFA);
|
||||
if (!dynamic_src) {
|
||||
vir_MOV_dest(c, unifa, base_offset);
|
||||
} else {
|
||||
vir_ADD_dest(c, unifa, base_offset,
|
||||
ntq_get_src(c, instr->src[1], 0));
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < nir_intrinsic_dest_components(instr); i++) {
|
||||
struct qinst *ldunifa =
|
||||
vir_add_inst(V3D_QPU_A_NOP, c->undef, c->undef, c->undef);
|
||||
ldunifa->qpu.sig.ldunifa = true;
|
||||
struct qreg data = vir_emit_def(c, ldunifa);
|
||||
ntq_store_dest(c, &instr->dest, i, vir_MOV(c, data));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
||||
{
|
||||
|
|
@ -2573,7 +2609,10 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
break;
|
||||
|
||||
case nir_intrinsic_load_ubo:
|
||||
ntq_emit_tmu_general(c, instr, false);
|
||||
if (!nir_src_is_divergent(instr->src[1]))
|
||||
ntq_emit_load_ubo_unifa(c, instr);
|
||||
else
|
||||
ntq_emit_tmu_general(c, instr, false);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue