mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 09:38:07 +02:00
intel/fs: Do 8-bit subgroup scan operations in 16 bits
Reviewed-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
This commit is contained in:
parent
651725f7a1
commit
03255da225
1 changed files with 39 additions and 3 deletions
|
|
@ -4925,10 +4925,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
opcode brw_op = brw_op_for_nir_reduction_op(redop);
|
||||
brw_conditional_mod cond_mod = brw_cond_mod_for_nir_reduction_op(redop);
|
||||
|
||||
/* There are a couple of register region issues that make things
|
||||
* complicated for 8-bit types:
|
||||
*
|
||||
* 1. Only raw moves are allowed to write to a packed 8-bit
|
||||
* destination.
|
||||
* 2. If we use a strided destination, the efficient way to do scan
|
||||
* operations ends up using strides that are too big to encode in
|
||||
* an instruction.
|
||||
*
|
||||
* To get around these issues, we just do all 8-bit scan operations in
|
||||
* 16 bits. It's actually fewer instructions than what we'd have to do
|
||||
* if we were trying to do it in native 8-bit types and the results are
|
||||
* the same once we truncate to 8 bits at the end.
|
||||
*/
|
||||
brw_reg_type scan_type = src.type;
|
||||
if (type_sz(scan_type) == 1)
|
||||
scan_type = brw_reg_type_from_bit_size(16, src.type);
|
||||
|
||||
/* Set up a register for all of our scratching around and initialize it
|
||||
* to reduction operation's identity value.
|
||||
*/
|
||||
fs_reg scan = bld.vgrf(src.type);
|
||||
fs_reg scan = bld.vgrf(scan_type);
|
||||
bld.exec_all().emit(SHADER_OPCODE_SEL_EXEC, scan, src, identity);
|
||||
|
||||
bld.emit_scan(brw_op, scan, cluster_size, cond_mod);
|
||||
|
|
@ -4971,10 +4989,28 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
opcode brw_op = brw_op_for_nir_reduction_op(redop);
|
||||
brw_conditional_mod cond_mod = brw_cond_mod_for_nir_reduction_op(redop);
|
||||
|
||||
/* There are a couple of register region issues that make things
|
||||
* complicated for 8-bit types:
|
||||
*
|
||||
* 1. Only raw moves are allowed to write to a packed 8-bit
|
||||
* destination.
|
||||
* 2. If we use a strided destination, the efficient way to do scan
|
||||
* operations ends up using strides that are too big to encode in
|
||||
* an instruction.
|
||||
*
|
||||
* To get around these issues, we just do all 8-bit scan operations in
|
||||
* 16 bits. It's actually fewer instructions than what we'd have to do
|
||||
* if we were trying to do it in native 8-bit types and the results are
|
||||
* the same once we truncate to 8 bits at the end.
|
||||
*/
|
||||
brw_reg_type scan_type = src.type;
|
||||
if (type_sz(scan_type) == 1)
|
||||
scan_type = brw_reg_type_from_bit_size(16, src.type);
|
||||
|
||||
/* Set up a register for all of our scratching around and initialize it
|
||||
* to reduction operation's identity value.
|
||||
*/
|
||||
fs_reg scan = bld.vgrf(src.type);
|
||||
fs_reg scan = bld.vgrf(scan_type);
|
||||
const fs_builder allbld = bld.exec_all();
|
||||
allbld.emit(SHADER_OPCODE_SEL_EXEC, scan, src, identity);
|
||||
|
||||
|
|
@ -4983,7 +5019,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
|||
* shift of the contents before we can begin. To make things worse,
|
||||
* we can't do this with a normal stride; we have to use indirects.
|
||||
*/
|
||||
fs_reg shifted = bld.vgrf(src.type);
|
||||
fs_reg shifted = bld.vgrf(scan_type);
|
||||
fs_reg idx = bld.vgrf(BRW_REGISTER_TYPE_W);
|
||||
allbld.ADD(idx, nir_system_values[SYSTEM_VALUE_SUBGROUP_INVOCATION],
|
||||
brw_imm_w(-1));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue