intel/brw: Add scoreboard support for scalar register

Xe3 adds a new pipe that handles *only* MOVs from immediate into the
scalar register.

Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Lionel Landwerlin <None>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32410>
This commit is contained in:
Caio Oliveira 2024-11-20 09:12:22 -08:00 committed by Marge Bot
parent 3868102a04
commit 00fac79f99
3 changed files with 80 additions and 1 deletions

View file

@ -130,7 +130,16 @@ namespace {
return TGL_PIPE_NONE;
else if (devinfo->verx10 < 125)
return TGL_PIPE_FLOAT;
else if (inst->is_math() && devinfo->ver >= 20)
else if (devinfo->ver >= 30 &&
inst->exec_size == 1 &&
inst->dst.file == ARF &&
inst->dst.nr == BRW_ARF_SCALAR &&
inst->src[0].file == IMM) {
/* Scalar pipe has a very narrow usage. See Bspec 56701 (r60146),
* in the SWSB description entry.
*/
return TGL_PIPE_SCALAR;
} else if (inst->is_math() && devinfo->ver >= 20)
return TGL_PIPE_MATH;
else if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT ||
inst->opcode == SHADER_OPCODE_BROADCAST ||
@ -692,6 +701,7 @@ namespace {
sb.addr_dep = merge(eq, sb0.addr_dep, sb1.addr_dep);
sb.accum_dep = merge(eq, sb0.accum_dep, sb1.accum_dep);
sb.scalar_dep = merge(eq, sb0.scalar_dep, sb1.scalar_dep);
return sb;
}
@ -710,6 +720,7 @@ namespace {
sb.addr_dep = shadow(sb0.addr_dep, sb1.addr_dep);
sb.accum_dep = shadow(sb0.accum_dep, sb1.accum_dep);
sb.scalar_dep = shadow(sb0.scalar_dep, sb1.scalar_dep);
return sb;
}
@ -728,6 +739,7 @@ namespace {
sb.addr_dep = transport(sb0.addr_dep, delta);
sb.accum_dep = transport(sb0.accum_dep, delta);
sb.scalar_dep = transport(sb0.scalar_dep, delta);
return sb;
}
@ -746,6 +758,9 @@ namespace {
if (sb0.accum_dep != sb1.accum_dep)
return false;
if (sb0.scalar_dep != sb1.scalar_dep)
return false;
return true;
}
@ -759,6 +774,7 @@ namespace {
dependency grf_deps[XE3_MAX_GRF];
dependency addr_dep;
dependency accum_dep;
dependency scalar_dep;
dependency *
dep(const brw_reg &r)
@ -771,6 +787,8 @@ namespace {
reg < BRW_ARF_ACCUMULATOR ? &addr_dep :
r.file == ARF && reg >= BRW_ARF_ACCUMULATOR &&
reg < BRW_ARF_FLAG ? &accum_dep :
r.file == ARF && reg >= BRW_ARF_SCALAR &&
reg < BRW_ARF_STATE ? &scalar_dep :
NULL);
}
};

View file

@ -453,6 +453,9 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
case BRW_ARF_FLAG:
fprintf(file, "f%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
break;
case BRW_ARF_SCALAR:
fprintf(file, "s0.%d", inst->dst.subnr);
break;
default:
fprintf(file, "arf%d.%d", inst->dst.nr & 0xf, inst->dst.subnr);
break;
@ -576,6 +579,9 @@ brw_print_instruction(const fs_visitor &s, const fs_inst *inst, FILE *file, cons
case BRW_ARF_FLAG:
fprintf(file, "f%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
break;
case BRW_ARF_SCALAR:
fprintf(file, "s0.%d", inst->src[i].subnr);
break;
default:
fprintf(file, "arf%d.%d", inst->src[i].nr & 0xf, inst->src[i].subnr);
break;

View file

@ -1182,3 +1182,58 @@ TEST_F(scoreboard_test, gfx200_cannot_embed_outoforder_dst_dependency_in_send_eo
EXPECT_EQ(instruction(block0, 2)->sched, tgl_swsb_null());
}
static brw_reg
brw_s0_with_region(enum brw_reg_type type, unsigned subnr, unsigned v, unsigned w, unsigned h)
{
return brw_make_reg(ARF,
BRW_ARF_SCALAR,
subnr,
0,
0,
type,
cvt(v),
cvt(w)-1,
cvt(h),
BRW_SWIZZLE_XYZW,
WRITEMASK_XYZW);
}
TEST_F(scoreboard_test, scalar_register_mov_immediate_is_in_scalar_pipe)
{
devinfo->ver = 30;
devinfo->verx10 = 300;
brw_init_isa_info(&compiler->isa, devinfo);
brw_reg scalar = brw_s0_with_region(BRW_TYPE_UW, 0, 0, 1, 0);
bld.group(1, 0).exec_all().MOV(scalar, brw_imm_uw(0x1415));
bld .MOV(brw_uw8_grf(20, 0), scalar);
brw_calculate_cfg(*v);
lower_scoreboard(v);
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, regdist(TGL_PIPE_SCALAR, 1));
}
TEST_F(scoreboard_test, scalar_register_mov_grf_is_not_in_scalar_pipe)
{
devinfo->ver = 30;
devinfo->verx10 = 300;
brw_init_isa_info(&compiler->isa, devinfo);
brw_reg scalar = brw_s0_with_region(BRW_TYPE_UW, 0, 0, 1, 0);
bld.group(1, 0).exec_all().MOV(scalar, brw_uw8_grf(0, 0));
bld .MOV(brw_uw8_grf(20, 0), scalar);
brw_calculate_cfg(*v);
lower_scoreboard(v);
bblock_t *block0 = v->cfg->blocks[0];
EXPECT_EQ(instruction(block0, 0)->sched, tgl_swsb_null());
EXPECT_EQ(instruction(block0, 1)->sched, regdist(TGL_PIPE_INT, 1));
}