brw: fix mask componentation for 16-bit sampler returns

We can't use register counts since 16-bit sampler loads in SIMD8 will
only write back half a GRF.

Signed-off-by: Lionel Landwerlin <llandwerlin@gmail.com>
Fixes: 0116430d39 ("intel/brw: Handle 16-bit sampler return payloads")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Acked-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31307>
This commit is contained in:
Lionel Landwerlin 2024-09-23 10:51:57 +03:00 committed by Marge Bot
parent eeb5f6e8c8
commit 2ed4af057a

View file

@ -759,7 +759,6 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
brw_type_with_size(BRW_TYPE_UD, payload_type_bit_size);
const enum brw_reg_type payload_signed_type =
brw_type_with_size(BRW_TYPE_D, payload_type_bit_size);
unsigned reg_width = bld.dispatch_width() / 8;
unsigned header_size = 0, length = 0;
opcode op = inst->opcode;
brw_reg sources[1 + MAX_SAMPLER_MESSAGE_SIZE];
@ -790,10 +789,15 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
* and we have an explicit header, we need to set up the sampler
* writemask. It's reversed from normal: 1 means "don't write".
*/
unsigned reg_count = regs_written(inst) - reg_unit(devinfo) * residency;
if (!inst->eot && reg_count < 4 * reg_width) {
assert(reg_count % reg_width == 0);
unsigned mask = ~((1 << (reg_count / reg_width)) - 1) & 0xf;
unsigned comps_regs =
DIV_ROUND_UP(regs_written(inst) - reg_unit(devinfo) * residency,
reg_unit(devinfo));
unsigned comp_regs =
DIV_ROUND_UP(inst->dst.component_size(inst->exec_size),
reg_unit(devinfo) * REG_SIZE);
if (!inst->eot && comps_regs < 4 * comp_regs) {
assert(comps_regs % comp_regs == 0);
unsigned mask = ~((1 << (comps_regs / comp_regs)) - 1) & 0xf;
inst->offset |= mask << 12;
}
@ -1088,7 +1092,7 @@ lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst,
}
const brw_reg src_payload =
brw_vgrf(bld.shader->alloc.allocate(length * reg_width),
brw_vgrf(bld.shader->alloc.allocate(length * bld.dispatch_width() / 8),
BRW_TYPE_F);
/* In case of 16-bit payload each component takes one full register in
* both SIMD8H and SIMD16H modes. In both cases one reg can hold 16