mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-06 02:20:11 +01:00
aco: more carefully apply constant offsets into scratch accesses
Death stranding does scratch_arr[80-idx]. This doesn't seem to work if we
try to combine the subtraction into the access.
fossil-db (navi21):
Totals from 52 (0.04% of 135636) affected shaders:
Instrs: 78560 -> 79036 (+0.61%)
CodeSize: 427940 -> 431188 (+0.76%)
Latency: 1313809 -> 1318142 (+0.33%)
InvThroughput: 292833 -> 293842 (+0.34%)
VClause: 2361 -> 2555 (+8.22%); split: -0.51%, +8.73%
Copies: 8767 -> 8746 (-0.24%); split: -0.35%, +0.11%
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Fixes: 0e783d687a ("aco: use scratch_* for scratch load/store on GFX9+")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/7735
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/20117>
This commit is contained in:
parent
fae8c75ad0
commit
381de3c809
1 changed files with 19 additions and 4 deletions
|
|
@ -1255,12 +1255,14 @@ is_op_canonicalized(opt_ctx& ctx, Operand op)
|
|||
}
|
||||
|
||||
bool
|
||||
is_scratch_offset_valid(opt_ctx& ctx, Instruction* instr, int32_t offset)
|
||||
is_scratch_offset_valid(opt_ctx& ctx, Instruction* instr, int64_t offset0, int64_t offset1)
|
||||
{
|
||||
bool negative_unaligned_scratch_offset_bug = ctx.program->gfx_level == GFX10;
|
||||
int32_t min = ctx.program->dev.scratch_global_offset_min;
|
||||
int32_t max = ctx.program->dev.scratch_global_offset_max;
|
||||
|
||||
int64_t offset = offset0 + offset1;
|
||||
|
||||
bool has_vgpr_offset = instr && !instr->operands[0].isUndefined();
|
||||
if (negative_unaligned_scratch_offset_bug && has_vgpr_offset && offset < 0 && offset % 4)
|
||||
return false;
|
||||
|
|
@ -1467,15 +1469,28 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
|||
while (info.is_temp())
|
||||
info = ctx.info[info.temp.id()];
|
||||
|
||||
if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
|
||||
/* The hardware probably does: 'scratch_base + u2u64(saddr) + i2i64(offset)'. This means
|
||||
* we can't combine the addition if the unsigned addition overflows and offset is
|
||||
* positive. In theory, there is also issues if
|
||||
* 'ilt(offset, 0) && ige(saddr, 0) && ilt(saddr + offset, 0)', but that just
|
||||
* replaces an already out-of-bounds access with a larger one since 'saddr + offset'
|
||||
* would be larger than INT32_MAX.
|
||||
*/
|
||||
if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, true) &&
|
||||
base.regClass() == instr->operands[i].regClass() &&
|
||||
is_scratch_offset_valid(ctx, instr.get(), scratch.offset + (int32_t)offset)) {
|
||||
is_scratch_offset_valid(ctx, instr.get(), scratch.offset, (int32_t)offset)) {
|
||||
instr->operands[i].setTemp(base);
|
||||
scratch.offset += (int32_t)offset;
|
||||
continue;
|
||||
} else if (i <= 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, false) &&
|
||||
base.regClass() == instr->operands[i].regClass() && (int32_t)offset < 0 &&
|
||||
is_scratch_offset_valid(ctx, instr.get(), scratch.offset, (int32_t)offset)) {
|
||||
instr->operands[i].setTemp(base);
|
||||
scratch.offset += (int32_t)offset;
|
||||
continue;
|
||||
} else if (i <= 1 && info.is_constant_or_literal(32) &&
|
||||
ctx.program->gfx_level >= GFX10_3 &&
|
||||
is_scratch_offset_valid(ctx, NULL, scratch.offset + (int32_t)info.val)) {
|
||||
is_scratch_offset_valid(ctx, NULL, scratch.offset, (int32_t)info.val)) {
|
||||
/* GFX10.3+ can disable both SADDR and ADDR. */
|
||||
instr->operands[i] = Operand(instr->operands[i].regClass());
|
||||
scratch.offset += (int32_t)info.val;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue