mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 17:20:10 +01:00
intel/brw: Add lowering for SHADER_OPCODE_SEND_GATHER
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Lionel Landwerlin <None> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32410>
This commit is contained in:
parent
650ec7169d
commit
26d4d04d63
3 changed files with 92 additions and 0 deletions
|
|
@ -1466,6 +1466,11 @@ brw_allocate_registers(fs_visitor &s, bool allow_spilling)
|
|||
|
||||
s.debug_optimizer(nir, "lowered_vgrfs_to_fixed_grfs", 96, pass_num++);
|
||||
|
||||
if (s.devinfo->ver >= 30) {
|
||||
brw_lower_send_gather(s);
|
||||
s.debug_optimizer(nir, "lower_send_gather", 96, pass_num++);
|
||||
}
|
||||
|
||||
brw_shader_phase_update(s, BRW_SHADER_PHASE_AFTER_REGALLOC);
|
||||
|
||||
if (s.last_scratch > 0) {
|
||||
|
|
|
|||
|
|
@ -533,6 +533,7 @@ bool brw_lower_regioning(fs_visitor &s);
|
|||
bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
|
||||
bool brw_lower_scoreboard(fs_visitor &s);
|
||||
bool brw_lower_send_descriptors(fs_visitor &s);
|
||||
bool brw_lower_send_gather(fs_visitor &s);
|
||||
bool brw_lower_sends_overlapping_payload(fs_visitor &s);
|
||||
bool brw_lower_simd_width(fs_visitor &s);
|
||||
bool brw_lower_src_modifiers(fs_visitor &s, bblock_t *block, fs_inst *inst, unsigned i);
|
||||
|
|
|
|||
|
|
@ -757,6 +757,92 @@ brw_lower_vgrfs_to_fixed_grfs(fs_visitor &s)
|
|||
DEPENDENCY_VARIABLES);
|
||||
}
|
||||
|
||||
static brw_reg
|
||||
brw_s0(enum brw_reg_type type, unsigned subnr)
|
||||
{
|
||||
return brw_make_reg(ARF,
|
||||
BRW_ARF_SCALAR,
|
||||
subnr,
|
||||
0,
|
||||
0,
|
||||
type,
|
||||
BRW_VERTICAL_STRIDE_0,
|
||||
BRW_WIDTH_1,
|
||||
BRW_HORIZONTAL_STRIDE_0,
|
||||
BRW_SWIZZLE_XYZW,
|
||||
WRITEMASK_XYZW);
|
||||
}
|
||||
|
||||
static bool
|
||||
brw_lower_send_gather_inst(fs_visitor &s, bblock_t *block, fs_inst *inst)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
assert(devinfo->ver >= 30);
|
||||
|
||||
const unsigned unit = reg_unit(devinfo);
|
||||
assert(unit == 2);
|
||||
|
||||
assert(inst->opcode == SHADER_OPCODE_SEND_GATHER);
|
||||
assert(inst->sources > 2);
|
||||
assert(inst->src[2].file == BAD_FILE);
|
||||
|
||||
unsigned count = 0;
|
||||
uint8_t regs[16] = {};
|
||||
|
||||
const unsigned num_payload_sources = inst->sources - 3;
|
||||
assert(num_payload_sources > 0);
|
||||
|
||||
/* Limited by Src0.Length in the SEND instruction. */
|
||||
assert(num_payload_sources < 16);
|
||||
|
||||
for (unsigned i = 3; i < inst->sources; i++) {
|
||||
assert(inst->src[i].file == FIXED_GRF);
|
||||
assert(inst->src[i].nr % reg_unit(devinfo) == 0);
|
||||
|
||||
unsigned nr = phys_nr(devinfo, inst->src[i]);
|
||||
assert(nr <= UINT8_MAX);
|
||||
regs[count++] = nr;
|
||||
}
|
||||
|
||||
/* Fill out ARF scalar register with the physical register numbers
|
||||
* and use SEND_GATHER.
|
||||
*/
|
||||
brw_builder ubld = brw_builder(&s, block, inst).group(1, 0).exec_all();
|
||||
for (unsigned q = 0; q < DIV_ROUND_UP(count, 8); q++) {
|
||||
uint64_t v = 0;
|
||||
for (unsigned i = 0; i < 8; i++) {
|
||||
const uint64_t reg = regs[(q * 8) + i];
|
||||
v |= reg << (8 * i);
|
||||
}
|
||||
ubld.MOV(brw_s0(BRW_TYPE_UQ, q), brw_imm_uq(v));
|
||||
}
|
||||
|
||||
inst->src[2] = brw_s0(BRW_TYPE_UD, 0);
|
||||
inst->mlen = count * unit;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_lower_send_gather(fs_visitor &s)
|
||||
{
|
||||
assert(s.devinfo->ver >= 30);
|
||||
assert(s.grf_used || !"Must be called after register allocation");
|
||||
|
||||
bool progress = false;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, s.cfg) {
|
||||
if (inst->opcode == SHADER_OPCODE_SEND_GATHER)
|
||||
progress |= brw_lower_send_gather_inst(s, block, inst);
|
||||
}
|
||||
|
||||
if (progress)
|
||||
s.invalidate_analysis(DEPENDENCY_INSTRUCTION_DATA_FLOW |
|
||||
DEPENDENCY_VARIABLES);
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_lower_load_subgroup_invocation(fs_visitor &s)
|
||||
{
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue