mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-02-23 07:30:30 +01:00
ir3: add codegen for movs
movs is just nir_intrinsic_read_invocation so this is a matter of disabling the current lowering to nir_intrinsic_read_invocation_cond_ir3 and adding lowering to movs. Signed-off-by: Job Noorman <jnoorman@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32624>
This commit is contained in:
parent
d94bea85b8
commit
bf203fbf20
7 changed files with 66 additions and 1 deletions
|
|
@ -135,6 +135,7 @@ struct fd_dev_info {
|
|||
|
||||
bool has_getfiberid;
|
||||
bool mov_half_shared_quirk;
|
||||
bool has_movs;
|
||||
|
||||
bool has_dp2acc;
|
||||
bool has_dp4acc;
|
||||
|
|
|
|||
|
|
@ -425,6 +425,7 @@ a6xx_gen4 = A6XXProps(
|
|||
has_lpac = True,
|
||||
has_legacy_pipeline_shading_rate = True,
|
||||
has_getfiberid = True,
|
||||
has_movs = True,
|
||||
has_dp2acc = True,
|
||||
has_dp4acc = True,
|
||||
enable_lrz_fast_clear = True,
|
||||
|
|
@ -900,6 +901,7 @@ a7xx_base = A6XXProps(
|
|||
has_sample_locations = True,
|
||||
has_lpac = True,
|
||||
has_getfiberid = True,
|
||||
has_movs = True,
|
||||
has_dp2acc = True,
|
||||
has_dp4acc = True,
|
||||
enable_lrz_fast_clear = True,
|
||||
|
|
|
|||
|
|
@ -2657,6 +2657,29 @@ ir3_COV_rpt(struct ir3_builder *build, unsigned nrpt,
|
|||
return dst;
|
||||
}
|
||||
|
||||
static inline struct ir3_instruction *
|
||||
ir3_MOVS(struct ir3_builder *build, struct ir3_instruction *src,
|
||||
struct ir3_instruction *invocation, type_t type)
|
||||
{
|
||||
bool use_a0 = writes_addr0(invocation);
|
||||
struct ir3_instruction *instr =
|
||||
ir3_build_instr(build, OPC_MOVS, 1, use_a0 ? 1 : 2);
|
||||
ir3_register_flags flags = type_flags(type);
|
||||
|
||||
__ssa_dst(instr)->flags |= flags | IR3_REG_SHARED;
|
||||
__ssa_src(instr, src, 0);
|
||||
|
||||
if (use_a0) {
|
||||
ir3_instr_set_address(instr, invocation);
|
||||
} else {
|
||||
__ssa_src(instr, invocation, 0);
|
||||
}
|
||||
|
||||
instr->cat1.src_type = type;
|
||||
instr->cat1.dst_type = type;
|
||||
return instr;
|
||||
}
|
||||
|
||||
static inline struct ir3_instruction *
|
||||
ir3_MOVMSK(struct ir3_builder *build, unsigned components)
|
||||
{
|
||||
|
|
|
|||
|
|
@ -217,6 +217,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
|
|||
|
||||
compiler->has_getfiberid = dev_info->a6xx.has_getfiberid;
|
||||
compiler->mov_half_shared_quirk = dev_info->a6xx.mov_half_shared_quirk;
|
||||
compiler->has_movs = dev_info->a6xx.has_movs;
|
||||
|
||||
compiler->has_dp2acc = dev_info->a6xx.has_dp2acc;
|
||||
compiler->has_dp4acc = dev_info->a6xx.has_dp4acc;
|
||||
|
|
|
|||
|
|
@ -210,6 +210,9 @@ struct ir3_compiler {
|
|||
/* Whether half register shared->non-shared moves are broken. */
|
||||
bool mov_half_shared_quirk;
|
||||
|
||||
/* Whether movs is supported for subgroupBroadcast. */
|
||||
bool has_movs;
|
||||
|
||||
/* True if the shfl instruction is supported. Needed for subgroup rotate and
|
||||
* (more efficient) shuffle.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -2604,7 +2604,21 @@ apply_mov_half_shared_quirk(struct ir3_context *ctx,
|
|||
* adding an extra mov here so that the original destination stays full.
|
||||
*/
|
||||
if (src->dsts[0]->flags & IR3_REG_HALF) {
|
||||
dst = ir3_MOV(&ctx->build, dst, TYPE_U32);
|
||||
if (dst->opc == OPC_MOVS) {
|
||||
/* For movs, we have to fix up its dst_type and then convert back to
|
||||
* its original dst_type. Note that this might generate movs.u8u32
|
||||
* which doesn't work correctly, but since we convert back using
|
||||
* cov.u32u8, the end result will be correct.
|
||||
*/
|
||||
type_t dst_type = dst->cat1.dst_type;
|
||||
assert(type_uint(dst_type));
|
||||
|
||||
dst->cat1.dst_type = TYPE_U32;
|
||||
dst->dsts[0]->flags &= ~IR3_REG_HALF;
|
||||
dst = ir3_COV(&ctx->build, dst, dst->cat1.dst_type, dst_type);
|
||||
} else {
|
||||
dst = ir3_MOV(&ctx->build, dst, TYPE_U32);
|
||||
}
|
||||
if (!ctx->compiler->has_scalar_alu)
|
||||
dst->dsts[0]->flags &= ~IR3_REG_SHARED;
|
||||
}
|
||||
|
|
@ -3166,6 +3180,25 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
|||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_read_invocation: {
|
||||
struct ir3_instruction *const *srcs = ir3_get_src(ctx, &intr->src[0]);
|
||||
nir_src *nir_invocation = &intr->src[1];
|
||||
struct ir3_instruction *invocation = ir3_get_src(ctx, nir_invocation)[0];
|
||||
|
||||
if (!nir_src_is_const(*nir_invocation)) {
|
||||
invocation = ir3_get_addr0(ctx, invocation, 1);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < intr->def.num_components; i++) {
|
||||
dst[i] = ir3_MOVS(b, srcs[i], invocation,
|
||||
type_uint_size(intr->def.bit_size));
|
||||
dst[i] = apply_mov_half_shared_quirk(ctx, srcs[i], dst[i]);
|
||||
}
|
||||
|
||||
create_rpt = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_read_first_invocation: {
|
||||
struct ir3_instruction *src = ir3_get_src(ctx, &intr->src[0])[0];
|
||||
dst[0] = ir3_READ_FIRST_MACRO(b, src, 0);
|
||||
|
|
|
|||
|
|
@ -745,6 +745,8 @@ ir3_nir_lower_subgroups_filter(const nir_instr *instr, const void *data)
|
|||
default:
|
||||
return intrin->def.num_components > 1;
|
||||
}
|
||||
case nir_intrinsic_read_invocation:
|
||||
return !compiler->has_movs;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue