mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-15 08:50:28 +01:00
aco: Keep VGPR destinations for uniform shared loads when beneficial.
When the result of these loads is only used by cross-lane instructions, it is beneficial to use a VGPR destination. This is because this allows to put the s_waitcnt further down, which decreases latency. Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Tony Wasserka <tony.wasserka@gmx.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11072>
This commit is contained in:
parent
ce141e4c5f
commit
b4e22eb482
1 changed files with 35 additions and 1 deletions
|
|
@ -89,6 +89,31 @@ is_block_reachable(nir_function_impl *impl, nir_block *known_reachable, nir_bloc
|
|||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
only_used_by_readlane_or_phi(nir_dest *dest)
|
||||
{
|
||||
nir_src *src = list_first_entry(&dest->ssa.uses, nir_src, use_link);
|
||||
|
||||
switch (src->parent_instr->type) {
|
||||
case nir_instr_type_alu: {
|
||||
nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
|
||||
if (alu->op == nir_op_unpack_64_2x32_split_x || alu->op == nir_op_unpack_64_2x32_split_y)
|
||||
return only_used_by_readlane_or_phi(&alu->dest.dest);
|
||||
return false;
|
||||
}
|
||||
case nir_instr_type_intrinsic: {
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(src->parent_instr);
|
||||
return intrin->intrinsic == nir_intrinsic_read_invocation ||
|
||||
intrin->intrinsic == nir_intrinsic_read_first_invocation ||
|
||||
intrin->intrinsic == nir_intrinsic_lane_permute_16_amd;
|
||||
}
|
||||
case nir_instr_type_phi:
|
||||
return only_used_by_readlane_or_phi(&nir_instr_as_phi(src->parent_instr)->dest);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* If one side of a divergent IF ends in a branch and the other doesn't, we
|
||||
* might have to emit the contents of the side without the branch at the merge
|
||||
* block instead. This is so that we can use any SGPR live-out of the side
|
||||
|
|
@ -830,6 +855,16 @@ void init_context(isel_context *ctx, nir_shader *shader)
|
|||
case nir_intrinsic_bvh64_intersect_ray_amd:
|
||||
type = RegType::vgpr;
|
||||
break;
|
||||
case nir_intrinsic_load_shared:
|
||||
/* When the result of these loads is only used by cross-lane instructions,
|
||||
* it is beneficial to use a VGPR destination. This is because this allows
|
||||
* to put the s_waitcnt further down, which decreases latency.
|
||||
*/
|
||||
if (only_used_by_readlane_or_phi(&intrinsic->dest)) {
|
||||
type = RegType::vgpr;
|
||||
break;
|
||||
}
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_shuffle:
|
||||
case nir_intrinsic_quad_broadcast:
|
||||
case nir_intrinsic_quad_swap_horizontal:
|
||||
|
|
@ -844,7 +879,6 @@ void init_context(isel_context *ctx, nir_shader *shader)
|
|||
case nir_intrinsic_load_ssbo:
|
||||
case nir_intrinsic_load_global:
|
||||
case nir_intrinsic_vulkan_resource_index:
|
||||
case nir_intrinsic_load_shared:
|
||||
case nir_intrinsic_get_ssbo_size:
|
||||
type = nir_dest_is_divergent(intrinsic->dest) ? RegType::vgpr : RegType::sgpr;
|
||||
break;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue