brw: use a single virtual opcode to read ARF registers

In 2c65d90bc8 I forgot to add the new SHADER_OPCODE_READ_MASK_REG
opcode to the list of barrier instruction in the scheduler. Let's just
use a single opcode for all ARF registers that need special
scoreboarding and put the register as source (nicer for the debug
output).

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 2c65d90bc8 ("intel/brw: ensure find_live_channel don't access arch register without sync")
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29446>
This commit is contained in:
Lionel Landwerlin 2024-05-28 16:43:43 +03:00 committed by Marge Bot
parent 588c725f27
commit d8b78924c5
8 changed files with 19 additions and 37 deletions

View file

@ -536,8 +536,7 @@ enum opcode {
SHADER_OPCODE_BTD_SPAWN_LOGICAL,
SHADER_OPCODE_BTD_RETIRE_LOGICAL,
SHADER_OPCODE_READ_MASK_REG,
SHADER_OPCODE_READ_SR_REG,
SHADER_OPCODE_READ_ARCH_REG,
RT_OPCODE_TRACE_RAY_LOGICAL,
};

View file

@ -2464,10 +2464,8 @@ brw_instruction_name(const struct brw_isa_info *isa, enum opcode op)
return "btd_spawn_logical";
case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
return "btd_retire_logical";
case SHADER_OPCODE_READ_MASK_REG:
return "read_mask_reg";
case SHADER_OPCODE_READ_SR_REG:
return "read_sr_reg";
case SHADER_OPCODE_READ_ARCH_REG:
return "read_arch_reg";
}
unreachable("not reached");

View file

@ -1329,7 +1329,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
brw_float_controls_mode(p, src[0].d, src[1].d);
break;
case SHADER_OPCODE_READ_MASK_REG:
case SHADER_OPCODE_READ_ARCH_REG:
if (devinfo->ver >= 12) {
/* There is a SWSB restriction that requires that any time sr0 is
* accessed both the instruction doing the access and the next one
@ -1337,33 +1337,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
*/
if (brw_get_default_swsb(p).mode != TGL_SBID_NULL)
brw_SYNC(p, TGL_SYNC_NOP);
assert(src[0].file == BRW_IMMEDIATE_VALUE);
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud),
BRW_TYPE_UD));
brw_MOV(p, dst, src[0]);
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_AND(p, dst, dst, brw_imm_ud(0xffffffff));
} else {
brw_MOV(p, dst, retype(brw_mask_reg(src[0].ud),
BRW_TYPE_UD));
}
break;
case SHADER_OPCODE_READ_SR_REG:
if (devinfo->ver >= 12) {
/* There is a SWSB restriction that requires that any time sr0 is
* accessed both the instruction doing the access and the next one
* have SWSB set to RegDist(1).
*/
if (brw_get_default_swsb(p).mode != TGL_SBID_NULL)
brw_SYNC(p, TGL_SYNC_NOP);
assert(src[0].file == BRW_IMMEDIATE_VALUE);
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_MOV(p, dst, brw_sr0_reg(src[0].ud));
brw_set_default_swsb(p, tgl_swsb_regdist(1));
brw_AND(p, dst, dst, brw_imm_ud(0xffffffff));
} else {
brw_MOV(p, dst, brw_sr0_reg(src[0].ud));
brw_MOV(p, dst, src[0]);
}
break;

View file

@ -390,7 +390,9 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
fs_reg exec_mask = ubld.vgrf(BRW_TYPE_UD);
ubld.UNDEF(exec_mask);
ubld.emit(SHADER_OPCODE_READ_MASK_REG, exec_mask, brw_imm_ud(0));
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, exec_mask,
retype(brw_mask_reg(0),
BRW_TYPE_UD));
/* ce0 doesn't consider the thread dispatch mask (DMask or VMask),
* so combine the execution and dispatch masks to obtain the true mask.
@ -402,7 +404,9 @@ brw_fs_lower_find_live_channel(fs_visitor &s)
if (!(first && packed_dispatch)) {
fs_reg mask = ubld.vgrf(BRW_TYPE_UD);
ubld.UNDEF(mask);
ubld.emit(SHADER_OPCODE_READ_SR_REG, mask, brw_imm_ud(vmask ? 3 : 2));
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, mask,
retype(brw_sr0_reg(vmask ? 3 : 2),
BRW_TYPE_UD));
/* Quarter control has the effect of magically shifting the value of
* ce0 so you'll get the first/last active channel relative to the

View file

@ -7442,7 +7442,9 @@ fs_nir_emit_intrinsic(nir_to_brw_state &ntb,
* [2:0] : Thread ID
*/
fs_reg raw_id = bld.vgrf(BRW_TYPE_UD);
bld.emit(SHADER_OPCODE_READ_SR_REG, raw_id, brw_imm_ud(0));
bld.UNDEF(raw_id);
bld.emit(SHADER_OPCODE_READ_ARCH_REG, raw_id, retype(brw_sr0_reg(0),
BRW_TYPE_UD));
switch (nir_intrinsic_base(instr)) {
case BRW_TOPOLOGY_ID_DSS:
if (devinfo->ver >= 20) {

View file

@ -312,8 +312,7 @@ namespace {
case FS_OPCODE_DDY_COARSE:
case FS_OPCODE_PIXEL_X:
case FS_OPCODE_PIXEL_Y:
case SHADER_OPCODE_READ_MASK_REG:
case SHADER_OPCODE_READ_SR_REG:
case SHADER_OPCODE_READ_ARCH_REG:
if (devinfo->ver >= 11) {
return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,
0, 10, 6 /* XXX */, 14, 0, 0);

View file

@ -1308,7 +1308,8 @@ emit_predicate_on_vector_mask(const fs_builder &bld, fs_inst *inst)
const fs_visitor &s = *bld.shader;
const fs_reg vector_mask = ubld.vgrf(BRW_TYPE_UW);
ubld.UNDEF(vector_mask);
ubld.emit(SHADER_OPCODE_READ_SR_REG, vector_mask, brw_imm_ud(3));
ubld.emit(SHADER_OPCODE_READ_ARCH_REG, vector_mask, retype(brw_sr0_reg(3),
BRW_TYPE_UD));
const unsigned subreg = sample_mask_flag_subreg(s);
ubld.MOV(brw_flag_subreg(subreg + inst->group / 16), vector_mask);

View file

@ -1048,7 +1048,7 @@ has_cross_lane_access(const fs_inst *inst)
* accesses.
*/
if (inst->opcode == SHADER_OPCODE_BROADCAST ||
inst->opcode == SHADER_OPCODE_READ_SR_REG ||
inst->opcode == SHADER_OPCODE_READ_ARCH_REG ||
inst->opcode == SHADER_OPCODE_CLUSTER_BROADCAST ||
inst->opcode == SHADER_OPCODE_SHUFFLE ||
inst->opcode == FS_OPCODE_LOAD_LIVE_CHANNELS ||