mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-20 11:40:10 +01:00
intel/elk/chv: Implement WaClearArfDependenciesBeforeEot
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31746>
This commit is contained in:
parent
2a4efe21c5
commit
ebd6738260
2 changed files with 101 additions and 0 deletions
|
|
@ -3318,6 +3318,94 @@ elk_fs_visitor::insert_gfx4_send_dependency_workarounds()
|
||||||
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* flags_read() and flags_written() return flag access with byte granularity,
|
||||||
|
* but for Flag Register PRM lists "Access Granularity: Word", so we can assume
|
||||||
|
* accessing any part of a word will clear its register dependency.
|
||||||
|
*/
|
||||||
|
static unsigned
|
||||||
|
bytes_bitmask_to_words(unsigned b)
|
||||||
|
{
|
||||||
|
unsigned first_byte_mask = b & 0x55555555;
|
||||||
|
unsigned second_byte_mask = b & 0xaaaaaaaa;
|
||||||
|
return first_byte_mask |
|
||||||
|
(first_byte_mask << 1) |
|
||||||
|
second_byte_mask |
|
||||||
|
(second_byte_mask >> 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* WaClearArfDependenciesBeforeEot
|
||||||
|
*
|
||||||
|
* Flag register dependency not cleared after EOT, so we have to source them
|
||||||
|
* before EOT. We can do this with simple `mov(1) nullUD, f{0,1}UD`
|
||||||
|
*
|
||||||
|
* To avoid emitting MOVs when it's not needed, check if each block reads all
|
||||||
|
* the flags it sets. We might falsely determine register as unread if it'll be
|
||||||
|
* accessed inside the next blocks, but this still should be good enough.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
elk_fs_visitor::workaround_source_arf_before_eot()
|
||||||
|
{
|
||||||
|
bool progress = false;
|
||||||
|
|
||||||
|
if (devinfo->platform != INTEL_PLATFORM_CHV)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
unsigned flags_unread = 0;
|
||||||
|
|
||||||
|
foreach_block(block, cfg) {
|
||||||
|
unsigned flags_unread_in_block = 0;
|
||||||
|
|
||||||
|
foreach_inst_in_block(elk_fs_inst, inst, block) {
|
||||||
|
/* Instruction can read and write to the same flag, so the order is important */
|
||||||
|
flags_unread_in_block &= ~bytes_bitmask_to_words(inst->flags_read(devinfo));
|
||||||
|
flags_unread_in_block |= bytes_bitmask_to_words(inst->flags_written(devinfo));
|
||||||
|
|
||||||
|
/* HALT does not start its block even though it can leave a dependency */
|
||||||
|
if (inst->opcode == ELK_OPCODE_HALT ||
|
||||||
|
inst->opcode == ELK_SHADER_OPCODE_HALT_TARGET) {
|
||||||
|
flags_unread |= flags_unread_in_block;
|
||||||
|
flags_unread_in_block = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
flags_unread |= flags_unread_in_block;
|
||||||
|
|
||||||
|
if ((flags_unread & 0x0f) && (flags_unread & 0xf0))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (flags_unread) {
|
||||||
|
int eot_count = 0;
|
||||||
|
|
||||||
|
foreach_block_and_inst_safe(block, elk_fs_inst, inst, cfg)
|
||||||
|
{
|
||||||
|
if (!inst->eot)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Currently, we always emit only one EOT per program,
|
||||||
|
* this WA should be updated if it ever changes.
|
||||||
|
*/
|
||||||
|
assert(++eot_count == 1);
|
||||||
|
|
||||||
|
const fs_builder ibld(this, block, inst);
|
||||||
|
const fs_builder ubld = ibld.exec_all().group(1, 0);
|
||||||
|
|
||||||
|
if (flags_unread & 0x0f)
|
||||||
|
ubld.MOV(ubld.null_reg_ud(), retype(elk_flag_reg(0, 0), ELK_REGISTER_TYPE_UD));
|
||||||
|
|
||||||
|
if (flags_unread & 0xf0)
|
||||||
|
ubld.MOV(ubld.null_reg_ud(), retype(elk_flag_reg(1, 0), ELK_REGISTER_TYPE_UD));
|
||||||
|
}
|
||||||
|
|
||||||
|
progress = true;
|
||||||
|
invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
|
||||||
|
}
|
||||||
|
|
||||||
|
return progress;
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
elk_fs_visitor::lower_load_payload()
|
elk_fs_visitor::lower_load_payload()
|
||||||
{
|
{
|
||||||
|
|
@ -5945,6 +6033,8 @@ elk_fs_visitor::run_vs()
|
||||||
|
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6062,6 +6152,8 @@ elk_fs_visitor::run_tcs()
|
||||||
|
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6090,6 +6182,8 @@ elk_fs_visitor::run_tes()
|
||||||
|
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6135,6 +6229,8 @@ elk_fs_visitor::run_gs()
|
||||||
|
|
||||||
allocate_registers(true /* allow_spilling */);
|
allocate_registers(true /* allow_spilling */);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -6206,6 +6302,8 @@ elk_fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||||
fixup_3src_null_dest();
|
fixup_3src_null_dest();
|
||||||
|
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
}
|
}
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
|
|
@ -6244,6 +6342,8 @@ elk_fs_visitor::run_cs(bool allow_spilling)
|
||||||
|
|
||||||
allocate_registers(allow_spilling);
|
allocate_registers(allow_spilling);
|
||||||
|
|
||||||
|
workaround_source_arf_before_eot();
|
||||||
|
|
||||||
return !failed;
|
return !failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -266,6 +266,7 @@ public:
|
||||||
elk_fs_inst *inst);
|
elk_fs_inst *inst);
|
||||||
void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
|
void insert_gfx4_post_send_dependency_workarounds(elk_bblock_t *block,
|
||||||
elk_fs_inst *inst);
|
elk_fs_inst *inst);
|
||||||
|
bool workaround_source_arf_before_eot();
|
||||||
void vfail(const char *msg, va_list args);
|
void vfail(const char *msg, va_list args);
|
||||||
void fail(const char *msg, ...);
|
void fail(const char *msg, ...);
|
||||||
void limit_dispatch_width(unsigned n, const char *msg);
|
void limit_dispatch_width(unsigned n, const char *msg);
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue