mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-30 23:00:11 +01:00
brw/lower: Adjust source stride on DF is_scalar sources to MAD on Gfx9
This commit used to be "brw/emit: Allow scalar sources to 64-bit 3-source instructions". These instructions were fixed up in brw_eu_emit. There seems to be some conflict with the <0,1,0> stride an post-RA scheduling. The only difference between the passing code generated by this commit and the failing code generated by the older commit is some post-RA scheduling. v2: Change the stride of a MAD even if the instruction isn't lowered. MAD instructions that are already SIMD8 have to follow the same rules. 🤦 v3: Pull the lowering out to its own pass. Update the comment in brw_fs_validate. Suggested by Ken. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29884>
This commit is contained in:
parent
d5d7ae22ae
commit
007c92b2ac
4 changed files with 49 additions and 1 deletions
|
|
@ -641,6 +641,7 @@ bool brw_fs_lower_logical_sends(fs_visitor &s);
|
|||
bool brw_fs_lower_pack(fs_visitor &s);
|
||||
bool brw_fs_lower_load_payload(fs_visitor &s);
|
||||
bool brw_fs_lower_regioning(fs_visitor &s);
|
||||
bool brw_lower_scalar_fp64_MAD(fs_visitor &s);
|
||||
bool brw_fs_lower_scoreboard(fs_visitor &s);
|
||||
bool brw_fs_lower_sends_overlapping_payload(fs_visitor &s);
|
||||
bool brw_fs_lower_simd_width(fs_visitor &s);
|
||||
|
|
|
|||
|
|
@ -8,6 +8,47 @@
|
|||
|
||||
using namespace brw;
|
||||
|
||||
/**
|
||||
* Align16 3-source instructions cannot have scalar stride w/64-bit types.
|
||||
*
|
||||
* The Bspec says:
|
||||
*
|
||||
* Replicate Control. This field is only present in three-source
|
||||
* instructions, for each of the three source operands. It controls
|
||||
* replication of the starting channel to all channels in the execution
|
||||
* size. ChanSel does not apply when Replicate Control is set. This is
|
||||
* applicable to 32b datatypes and 16b datatype. 64b datatypes cannot use
|
||||
* the replicate control.
|
||||
*
|
||||
* In practice, this can only happen on Gfx9 with DF sources to MAD. Since
|
||||
* the source is_scalar, this can be fixed by just making the stride=1. Also
|
||||
* clear is_scalar "just in case."
|
||||
*/
|
||||
bool
|
||||
brw_lower_scalar_fp64_MAD(fs_visitor &s)
|
||||
{
|
||||
const intel_device_info *devinfo = s.devinfo;
|
||||
bool progress = false;
|
||||
|
||||
if (devinfo->ver != 9)
|
||||
return false;
|
||||
|
||||
foreach_block_and_inst_safe(block, fs_inst, inst, s.cfg) {
|
||||
if (inst->opcode == BRW_OPCODE_MAD &&
|
||||
inst->dst.type == BRW_TYPE_DF) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
if (inst->src[i].is_scalar) {
|
||||
inst->src[i].is_scalar = false;
|
||||
inst->src[i].stride = 1;
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace UNIFORM register file access with either UNIFORM_PULL_CONSTANT_LOAD
|
||||
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
|
||||
|
|
|
|||
|
|
@ -96,6 +96,7 @@ brw_fs_optimize(fs_visitor &s)
|
|||
OPT(brw_fs_lower_subgroup_ops);
|
||||
OPT(brw_fs_lower_csel);
|
||||
OPT(brw_fs_lower_simd_width);
|
||||
OPT(brw_lower_scalar_fp64_MAD);
|
||||
OPT(brw_fs_lower_barycentrics);
|
||||
OPT(brw_fs_lower_logical_sends);
|
||||
|
||||
|
|
|
|||
|
|
@ -366,7 +366,12 @@ brw_fs_validate(const fs_visitor &s)
|
|||
const unsigned stride_in_bytes = byte_stride(inst->src[i]);
|
||||
const unsigned size_in_bytes = brw_type_size_bytes(inst->src[i].type);
|
||||
if (stride_in_bytes == 0) {
|
||||
fsv_assert_lte(size_in_bytes, 4);
|
||||
/* If the source is_scalar, then the stride will be
|
||||
* converted to <4;4,1> in brw_lower_scalar_fp64_MAD after
|
||||
* SIMD splitting.
|
||||
*/
|
||||
if (!inst->src[i].is_scalar)
|
||||
fsv_assert_lte(size_in_bytes, 4);
|
||||
} else {
|
||||
fsv_assert_eq(stride_in_bytes, size_in_bytes);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue