mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-08 04:48:08 +02:00
aco: workaround load tearing for load_shared2_amd
This probably has the same issue as load_shared.
Signed-off-by: Rhys Perry <pendingchaos02@gmail.com>
Fixes: 04956d54ce ("aco: force uniform result for LDS load with uniform address if it can be non uniform")
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/37417>
This commit is contained in:
parent
810d4ff299
commit
8931672eef
1 changed files with 9 additions and 2 deletions
|
|
@ -3215,11 +3215,18 @@ visit_access_shared2_amd(isel_context* ctx, nir_intrinsic_instr* instr)
|
|||
if (!is_store) {
|
||||
Temp dst = get_ssa_temp(ctx, &instr->def);
|
||||
if (dst.type() == RegType::sgpr) {
|
||||
/* Similar to load_shared. */
|
||||
bool readfirstlane_for_uniform = ctx->options->gfx_level >= GFX10 &&
|
||||
ctx->program->wave_size == 64 &&
|
||||
ctx->program->workgroup_size > 64;
|
||||
|
||||
emit_split_vector(ctx, ds->definitions[0].getTemp(), dst.size());
|
||||
Temp comp[4];
|
||||
/* Use scalar v_readfirstlane_b32 for better 32-bit copy propagation */
|
||||
for (unsigned i = 0; i < dst.size(); i++)
|
||||
comp[i] = bld.as_uniform(emit_extract_vector(ctx, ds->definitions[0].getTemp(), i, v1));
|
||||
for (unsigned i = 0; i < dst.size(); i++) {
|
||||
Temp val = emit_extract_vector(ctx, ds->definitions[0].getTemp(), i, v1);
|
||||
comp[i] = emit_vector_as_uniform(ctx, val, bld.tmp(s1), readfirstlane_for_uniform);
|
||||
}
|
||||
if (is64bit) {
|
||||
Temp comp0 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[0], comp[1]);
|
||||
Temp comp1 = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), comp[2], comp[3]);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue