From e3018177537014078c03e07dfce47d24bc91a1bd Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 23 Apr 2026 19:47:00 -0700 Subject: [PATCH] brw: Don't lower phis involved in DPAS instructions to scalar On my Arc A380 (DG2), this more than doubles the performance of Jeff Bolz's cooperative matrix benchmark. With llama.cpp modified to use cooperative matrix on DG2, performance is improved by 37%. Closes: #15311 Reviewed-by: Kenneth Graunke Tested-by: Matt Corallo Part-of: --- src/intel/compiler/brw/brw_nir.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/src/intel/compiler/brw/brw_nir.c b/src/intel/compiler/brw/brw_nir.c index d9189315a67..d89383f1a7e 100644 --- a/src/intel/compiler/brw/brw_nir.c +++ b/src/intel/compiler/brw/brw_nir.c @@ -1701,6 +1701,25 @@ brw_nir_tag_speculative_access(nir_shader *nir) static uint8_t brw_nir_lower_phis_to_scalar_cb(const nir_instr *instr, const void *_) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + /* If a phi is used by DPAS or if a phi source is the result of a DPAS, do + * not scalarize. + */ + nir_foreach_phi_src(src, phi) { + const nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src->src); + + if (intrin != NULL && intrin->intrinsic == nir_intrinsic_dpas_intel) + return 0; + } + + nir_foreach_use(use_src, &phi->def) { + const nir_intrinsic_instr *intrin = nir_src_as_intrinsic(*use_src); + + if (intrin != NULL && intrin->intrinsic == nir_intrinsic_dpas_intel) + return 0; + } + return 1; }