From b2ee98d2dbde3042d9ba40c741a95c511f3a3cd0 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Tue, 19 Mar 2024 11:16:18 -0700 Subject: [PATCH] intel/brw: Handle Xe2 in brw_fs_opt_zero_samples The mlen tracking is in REG_SIZE units, but in Xe2 each GRF has doubled the size. The optimization can only elide full GRFs, so round down the amount of trailing zeros to ensure the optimization will remove only full GRFs. Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_fs_opt.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_opt.cpp b/src/intel/compiler/brw_fs_opt.cpp index e9abeef4098..ab4679f4416 100644 --- a/src/intel/compiler/brw_fs_opt.cpp +++ b/src/intel/compiler/brw_fs_opt.cpp @@ -232,8 +232,10 @@ brw_fs_opt_zero_samples(fs_visitor &s) zero_size += lp->exec_size * type_sz(lp->src[i].type) * lp->dst.stride; } - const unsigned zero_len = zero_size / (reg_unit(s.devinfo) * REG_SIZE); + /* Round down to ensure to only consider full registers. */ + const unsigned zero_len = ROUND_DOWN_TO(zero_size / REG_SIZE, reg_unit(s.devinfo)); if (zero_len > 0) { + /* Note mlen is in REG_SIZE units. */ send->mlen -= zero_len; progress = true; }