From e8883bd40be96271551eceaeb8045284673f73fa Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Mon, 26 Aug 2024 15:34:21 -0700 Subject: [PATCH] intel/brw: Use size_written for NoMask instructions in is_partial_write The intention of inst->is_partial_write() is that it should return true when any REG_SIZE (32B) chunk of inst's destination is written but not fully overwritten. This can be used to tell whether inst combines new data with existing data, or screens off any previous writes, so the old values are no longer required. The existing (exec_size * brw_type_size_bytes(this->dst.type) < 32) check doesn't work in a number of cases. For example, LSC block loads have exec_size == 1 and force_writemask_all set, but may write multiple full registers of data. (Currently, we only see them with exec_size 1 after logical-send-lowering, so our SHADER_OPCODE_SEND special case was covering those.) We had also special cased UNDEF. Instead, we can simply check: 1. Predication 2. !inst->dst.contiguous() 3. inst->dst.offset % REG_SIZE != 0 4. inst->size_written % REG_SIZE != 0 We had the first three already, but #4 is new. If either #3 or #4 are true, then that implies there is a REG_SIZE chunk of the destination which is written, but not entirely written, so it's a partial write. Reviewed-by: Lionel Landwerlin Reviewed-by: Caio Oliveira Reviewed-by: Rohan Garg Part-of: --- src/intel/compiler/brw_fs.cpp | 21 ++++----------------- 1 file changed, 4 insertions(+), 17 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 7e5219654e5..b60e494fb0c 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -617,26 +617,13 @@ fs_inst::is_partial_write() const this->opcode != BRW_OPCODE_SEL) return true; + if (!this->dst.is_contiguous()) + return true; + if (this->dst.offset % REG_SIZE != 0) return true; - /* SEND instructions always write whole registers */ - if (this->opcode == SHADER_OPCODE_SEND) - return false; - - /* Special case UNDEF since a lot of places in the backend do things like this : - * - * fs_builder ubld = bld.exec_all().group(1, 0); - * brw_reg tmp = ubld.vgrf(BRW_TYPE_UD); - * ubld.UNDEF(tmp); <- partial write, even if the whole register is concerned - */ - if (this->opcode == SHADER_OPCODE_UNDEF) { - assert(this->dst.is_contiguous()); - return this->size_written < 32; - } - - return this->exec_size * brw_type_size_bytes(this->dst.type) < 32 || - !this->dst.is_contiguous(); + return this->size_written % REG_SIZE != 0; } unsigned