From e368b8e01bbc19c9b731468bbb73f86770a98ea2 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 12 Apr 2024 16:23:49 -0700 Subject: [PATCH] intel/brw/xe2+: Adjust size_read() for DPAS v2: Remov "DG2" from a comment because it applies to DG2 and Xe2. Suggested by Caio. Reviewed-by: Caio Oliveira Part-of: --- src/intel/compiler/brw_fs.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index fc3a493388f..66ef7f92324 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -936,25 +936,37 @@ fs_inst::size_read(int arg) const } break; - case BRW_OPCODE_DPAS: + case BRW_OPCODE_DPAS: { + /* This is a little bit sketchy. There's no way to get at devinfo from + * here, so the regular reg_unit() cannot be used. However, on + * reg_unit() == 1 platforms, DPAS exec_size must be 8, and on known + * reg_unit() == 2 platforms, DPAS exec_size must be 16. This is not a + * coincidence, so this isn't so bad. + */ + const unsigned reg_unit = this->exec_size / 8; + switch (arg) { case 0: if (src[0].type == BRW_TYPE_HF) { - return rcount * REG_SIZE / 2; + return rcount * reg_unit * REG_SIZE / 2; } else { - return rcount * REG_SIZE; + return rcount * reg_unit * REG_SIZE; } case 1: - return sdepth * REG_SIZE; + return sdepth * reg_unit * REG_SIZE; case 2: /* This is simpler than the formula described in the Bspec, but it - * covers all of the cases that we support on DG2. + * covers all of the cases that we support. Each inner sdepth + * iteration of the DPAS consumes a single dword for int8, uint8, or + * float16 types. These are the one source types currently + * supportable through Vulkan. This is independent of reg_unit. */ - return rcount * REG_SIZE; + return rcount * sdepth * 4; default: unreachable("Invalid source number."); } break; + } default: break;