ir3: add codegen for movs

movs is just nir_intrinsic_read_invocation so this is a matter of
disabling the current lowering to nir_intrinsic_read_invocation_cond_ir3
and adding lowering to movs.

Signed-off-by: Job Noorman <jnoorman@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32624>
This commit is contained in:
Job Noorman 2025-06-26 11:35:55 +02:00 committed by Marge Bot
parent d94bea85b8
commit bf203fbf20
7 changed files with 66 additions and 1 deletions

View file

@ -135,6 +135,7 @@ struct fd_dev_info {
bool has_getfiberid;
bool mov_half_shared_quirk;
bool has_movs;
bool has_dp2acc;
bool has_dp4acc;

View file

@ -425,6 +425,7 @@ a6xx_gen4 = A6XXProps(
has_lpac = True,
has_legacy_pipeline_shading_rate = True,
has_getfiberid = True,
has_movs = True,
has_dp2acc = True,
has_dp4acc = True,
enable_lrz_fast_clear = True,
@ -900,6 +901,7 @@ a7xx_base = A6XXProps(
has_sample_locations = True,
has_lpac = True,
has_getfiberid = True,
has_movs = True,
has_dp2acc = True,
has_dp4acc = True,
enable_lrz_fast_clear = True,

View file

@ -2657,6 +2657,29 @@ ir3_COV_rpt(struct ir3_builder *build, unsigned nrpt,
return dst;
}
static inline struct ir3_instruction *
ir3_MOVS(struct ir3_builder *build, struct ir3_instruction *src,
struct ir3_instruction *invocation, type_t type)
{
bool use_a0 = writes_addr0(invocation);
struct ir3_instruction *instr =
ir3_build_instr(build, OPC_MOVS, 1, use_a0 ? 1 : 2);
ir3_register_flags flags = type_flags(type);
__ssa_dst(instr)->flags |= flags | IR3_REG_SHARED;
__ssa_src(instr, src, 0);
if (use_a0) {
ir3_instr_set_address(instr, invocation);
} else {
__ssa_src(instr, invocation, 0);
}
instr->cat1.src_type = type;
instr->cat1.dst_type = type;
return instr;
}
static inline struct ir3_instruction *
ir3_MOVMSK(struct ir3_builder *build, unsigned components)
{

View file

@ -217,6 +217,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
compiler->has_getfiberid = dev_info->a6xx.has_getfiberid;
compiler->mov_half_shared_quirk = dev_info->a6xx.mov_half_shared_quirk;
compiler->has_movs = dev_info->a6xx.has_movs;
compiler->has_dp2acc = dev_info->a6xx.has_dp2acc;
compiler->has_dp4acc = dev_info->a6xx.has_dp4acc;

View file

@ -210,6 +210,9 @@ struct ir3_compiler {
/* Whether half register shared->non-shared moves are broken. */
bool mov_half_shared_quirk;
/* Whether movs is supported for subgroupBroadcast. */
bool has_movs;
/* True if the shfl instruction is supported. Needed for subgroup rotate and
* (more efficient) shuffle.
*/

View file

@ -2604,7 +2604,21 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx,
* adding an extra mov here so that the original destination stays full.
*/
if (src->dsts[0]->flags & IR3_REG_HALF) {
dst = ir3_MOV(&ctx->build, dst, TYPE_U32);
if (dst->opc == OPC_MOVS) {
/* For movs, we have to fix up its dst_type and then convert back to
* its original dst_type. Note that this might generate movs.u8u32
* which doesn't work correctly, but since we convert back using
* cov.u32u8, the end result will be correct.
*/
type_t dst_type = dst->cat1.dst_type;
assert(type_uint(dst_type));
dst->cat1.dst_type = TYPE_U32;
dst->dsts[0]->flags &= ~IR3_REG_HALF;
dst = ir3_COV(&ctx->build, dst, dst->cat1.dst_type, dst_type);
} else {
dst = ir3_MOV(&ctx->build, dst, TYPE_U32);
}
if (!ctx->compiler->has_scalar_alu)
dst->dsts[0]->flags &= ~IR3_REG_SHARED;
}
@ -3166,6 +3180,25 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
break;
}
case nir_intrinsic_read_invocation: {
struct ir3_instruction *const *srcs = ir3_get_src(ctx, &intr->src[0]);
nir_src *nir_invocation = &intr->src[1];
struct ir3_instruction *invocation = ir3_get_src(ctx, nir_invocation)[0];
if (!nir_src_is_const(*nir_invocation)) {
invocation = ir3_get_addr0(ctx, invocation, 1);
}
for (unsigned i = 0; i < intr->def.num_components; i++) {
dst[i] = ir3_MOVS(b, srcs[i], invocation,
type_uint_size(intr->def.bit_size));
dst[i] = apply_mov_half_shared_quirk(ctx, srcs[i], dst[i]);
}
create_rpt = true;
break;
}
case nir_intrinsic_read_first_invocation: {
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
dst[0] = ir3_READ_FIRST_MACRO(b, src, 0);

View file

@ -745,6 +745,8 @@ ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data)
default:
return intrin->def.num_components > 1;
}
case nir_intrinsic_read_invocation:
return !compiler->has_movs;
default:
return true;
}