From 29afc1c53e3e6b058485e9c1fd91cfb062809b9f Mon Sep 17 00:00:00 2001 From: Lionel Landwerlin Date: Fri, 8 Mar 2024 15:56:50 +0200 Subject: [PATCH] intel/fs: fixup sampler header message If you look at the sampler message header on Gfx9+, you'll see that we mostly only use 2 dwords (dw2 & dw3). DW2 has a bunch of sampler parameters, DW3 is the sampler handle. On Gfx9 we can micro optimize by copying r0 into the header because the HW mostly doesn't care about other DWs. We just have to clear dw2 on non VS/FS stages. On Gfx11+, we always have to do a careful copy of the r0.3 bits to mask out the bottom unrelated bits. So there, just clearing the entire header makes more sense. On Xe2+, the dw4 of the header references the sampler feedback surface handle and bit0 is a boolean to know whether to use that surface or not. So it *REALLY* matters to have that as 0. If we copy r0, we'll get random bits in dw4, leading to enable that surface. Signed-off-by: Lionel Landwerlin Cc: mesa-stable Reviewed-by: Rohan Garg Part-of: (cherry picked from commit 75c6ad99073ff4632955ae899057653902e6839f) --- .pick_status.json | 2 +- src/intel/compiler/brw_lower_logical_sends.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.pick_status.json b/.pick_status.json index 07660b01aa4..a8c5dae26bd 100644 --- a/.pick_status.json +++ b/.pick_status.json @@ -1344,7 +1344,7 @@ "description": "intel/fs: fixup sampler header message", "nominated": true, "nomination_type": 0, - "resolution": 0, + "resolution": 1, "main_sha": null, "because_sha": null, "notes": null diff --git a/src/intel/compiler/brw_lower_logical_sends.cpp b/src/intel/compiler/brw_lower_logical_sends.cpp index 1a784010bf8..472b15387f6 100644 --- a/src/intel/compiler/brw_lower_logical_sends.cpp +++ b/src/intel/compiler/brw_lower_logical_sends.cpp @@ -1009,10 +1009,14 @@ lower_sampler_logical_send_gfx7(const fs_builder &bld, fs_inst *inst, opcode op, /* Build the actual header */ const fs_builder ubld = bld.exec_all().group(8 * reg_unit(devinfo), 0); const fs_builder ubld1 = ubld.group(1, 0); - ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); + if (devinfo->ver >= 11) + ubld.MOV(header, brw_imm_ud(0)); + else + ubld.MOV(header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); if (inst->offset) { ubld1.MOV(component(header, 2), brw_imm_ud(inst->offset)); - } else if (bld.shader->stage != MESA_SHADER_VERTEX && + } else if (devinfo->ver < 11 && + bld.shader->stage != MESA_SHADER_VERTEX && bld.shader->stage != MESA_SHADER_FRAGMENT) { /* The vertex and fragment stages have g0.2 set to 0, so * header0.2 is 0 when g0 is copied. Other stages may not, so we