broadcom/compiler: use skip_helpers with textures, UBOs and SSBOs
Some checks are pending
macOS-CI / macOS-CI (dri) (push) Waiting to run
macOS-CI / macOS-CI (xlib) (push) Waiting to run

Set the per-pixel mask based on the value of skip_helpers.

This slightly increase the performance on several traces.

fps_avg  helped:  gl_gfxbench_trex.trace:                           22.30 -> 22.79 (2.20%)

total fps_avg in all runs: 55.18 -> 55.71 (0.97%)
total fps_avg in affected (through threshold) runs: 22.30 -> 22.79 (2.20%)
helped: 1
HURT: 0

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
Reviewed-by: Georg Lehmann <dadschoorse@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/38759>
This commit is contained in:
Juan A. Suarez Romero 2025-12-23 15:19:14 +01:00 committed by Marge Bot
parent a6330ed4d0
commit 13211eb2fc
3 changed files with 18 additions and 12 deletions

View file

@ -651,18 +651,14 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
v3d_tmu_get_type_from_op(tmu_op, !is_load) ==
V3D_TMU_OP_TYPE_ATOMIC;
/* Only load per-quad if we can be certain that all
* lines in the quad are active. Notice that demoted
* invocations, unlike terminated ones, are still
* active: we want to skip memory writes for them but
* loads should still work.
/* Only load per-quad if we can't skip helper
* invocations.
*/
uint32_t perquad =
is_load && !vir_in_nonuniform_control_flow(c) &&
((c->s->info.stage == MESA_SHADER_FRAGMENT &&
c->s->info.fs.needs_coarse_quad_helper_invocations &&
!c->emitted_discard) ||
c->s->info.uses_wide_subgroup_intrinsics) ?
is_load &&
c->s->info.stage == MESA_SHADER_FRAGMENT &&
nir_intrinsic_has_access(instr) &&
!(nir_intrinsic_access(instr) & ACCESS_SKIP_HELPERS) ?
GENERAL_TMU_LOOKUP_PER_QUAD :
GENERAL_TMU_LOOKUP_PER_PIXEL;
config = 0xffffff00 | tmu_op << 3 | perquad;

View file

@ -307,12 +307,15 @@ v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr)
/* To handle the cases were we can't just use p1_unpacked_default */
bool non_default_p1_config = nir_tex_instr_need_sampler(instr) ||
output_type_32_bit;
output_type_32_bit ||
(c->s->info.stage == MESA_SHADER_FRAGMENT &&
!instr->skip_helpers);
if (non_default_p1_config) {
struct V3D42_TMU_CONFIG_PARAMETER_1 p1_unpacked = {
.output_type_32_bit = output_type_32_bit,
.per_pixel_mask_enable = (c->s->info.stage != MESA_SHADER_FRAGMENT ||
instr->skip_helpers),
.unnormalized_coordinates = (instr->sampler_dim ==
GLSL_SAMPLER_DIM_RECT),
};

View file

@ -1768,6 +1768,13 @@ intrinsic_try_skip_helpers(nir_intrinsic_instr *intr, UNUSED void *data)
{
switch(intr->intrinsic) {
case nir_intrinsic_image_load:
case nir_intrinsic_load_uniform:
case nir_intrinsic_load_ubo:
case nir_intrinsic_load_ssbo:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_shared:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
return true;
default:
return false;