broadcom/compiler: convert 2x32 global operations to scalar variants

Acked-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29711>
This commit is contained in:
Karol Herbst 2024-06-14 20:07:45 +02:00 committed by Marge Bot
parent 9827cfe49e
commit a2eff2b9f9
4 changed files with 45 additions and 14 deletions

View file

@ -218,13 +218,11 @@ v3d_general_tmu_op(nir_intrinsic_instr *instr)
case nir_intrinsic_load_shared:
case nir_intrinsic_load_scratch:
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_2x32:
case nir_intrinsic_load_global_constant:
case nir_intrinsic_store_ssbo:
case nir_intrinsic_store_shared:
case nir_intrinsic_store_scratch:
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_2x32:
return V3D_TMU_OP_REGULAR;
case nir_intrinsic_ssbo_atomic:
@ -232,9 +230,7 @@ v3d_general_tmu_op(nir_intrinsic_instr *instr)
case nir_intrinsic_shared_atomic:
case nir_intrinsic_shared_atomic_swap:
case nir_intrinsic_global_atomic:
case nir_intrinsic_global_atomic_2x32:
case nir_intrinsic_global_atomic_swap:
case nir_intrinsic_global_atomic_swap_2x32:
return v3d_general_tmu_op_for_atomic(instr);
default:
@ -515,8 +511,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
bool atomic_add_replaced =
(instr->intrinsic == nir_intrinsic_ssbo_atomic ||
instr->intrinsic == nir_intrinsic_shared_atomic ||
instr->intrinsic == nir_intrinsic_global_atomic ||
instr->intrinsic == nir_intrinsic_global_atomic_2x32) &&
instr->intrinsic == nir_intrinsic_global_atomic) &&
nir_intrinsic_atomic_op(instr) == nir_atomic_op_iadd &&
(tmu_op == V3D_TMU_OP_WRITE_AND_READ_INC ||
tmu_op == V3D_TMU_OP_WRITE_OR_READ_DEC);
@ -524,8 +519,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
bool is_store = (instr->intrinsic == nir_intrinsic_store_ssbo ||
instr->intrinsic == nir_intrinsic_store_scratch ||
instr->intrinsic == nir_intrinsic_store_shared ||
instr->intrinsic == nir_intrinsic_store_global ||
instr->intrinsic == nir_intrinsic_store_global_2x32);
instr->intrinsic == nir_intrinsic_store_global);
bool is_load = (instr->intrinsic == nir_intrinsic_load_uniform ||
instr->intrinsic == nir_intrinsic_load_ubo ||
@ -533,7 +527,6 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
instr->intrinsic == nir_intrinsic_load_scratch ||
instr->intrinsic == nir_intrinsic_load_shared ||
instr->intrinsic == nir_intrinsic_load_global ||
instr->intrinsic == nir_intrinsic_load_global_2x32 ||
instr->intrinsic == nir_intrinsic_load_global_constant);
if (!is_load)
@ -552,7 +545,6 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
instr->intrinsic == nir_intrinsic_load_scratch ||
instr->intrinsic == nir_intrinsic_load_shared ||
instr->intrinsic == nir_intrinsic_load_global ||
instr->intrinsic == nir_intrinsic_load_global_2x32 ||
instr->intrinsic == nir_intrinsic_load_global_constant ||
atomic_add_replaced) {
offset_src = 0 + has_index;
@ -3403,7 +3395,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
case nir_intrinsic_load_global:
case nir_intrinsic_load_global_constant:
case nir_intrinsic_load_global_2x32:
ntq_emit_tmu_general(c, instr, false, true);
c->has_general_tmu_load = true;
break;
@ -3426,11 +3417,8 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_store_global:
case nir_intrinsic_store_global_2x32:
case nir_intrinsic_global_atomic:
case nir_intrinsic_global_atomic_2x32:
case nir_intrinsic_global_atomic_swap:
case nir_intrinsic_global_atomic_swap_2x32:
ntq_emit_tmu_general(c, instr, false, true);
break;

View file

@ -1200,6 +1200,7 @@ bool v3d_nir_lower_logic_ops(nir_shader *s, struct v3d_compile *c);
bool v3d_nir_lower_scratch(nir_shader *s);
bool v3d_nir_lower_txf_ms(nir_shader *s);
bool v3d_nir_lower_image_load_store(nir_shader *s, struct v3d_compile *c);
bool v3d_nir_lower_global_2x32(nir_shader *s);
bool v3d_nir_lower_load_store_bitsize(nir_shader *s);
void v3d_vir_emit_tex(struct v3d_compile *c, nir_tex_instr *instr);

View file

@ -300,6 +300,38 @@ v3d_size_align_cb(nir_intrinsic_op intrin, uint8_t bytes,
};
}
static nir_intrinsic_op
convert_global_2x32_to_scalar(nir_intrinsic_op op)
{
switch (op) {
case nir_intrinsic_global_atomic_2x32:
return nir_intrinsic_global_atomic;
case nir_intrinsic_global_atomic_swap_2x32:
return nir_intrinsic_global_atomic_swap;
case nir_intrinsic_load_global_2x32:
return nir_intrinsic_load_global;
case nir_intrinsic_store_global_2x32:
return nir_intrinsic_store_global;
default:
return op;
}
}
static bool
lower_global_2x32(nir_builder *b, nir_intrinsic_instr *intr, void *data)
{
nir_intrinsic_op op = convert_global_2x32_to_scalar(intr->intrinsic);
if (op == intr->intrinsic)
return false;
b->cursor = nir_before_instr(&intr->instr);
nir_src *addr_src = nir_get_io_offset_src(intr);
nir_src_rewrite(addr_src, nir_channel(b, addr_src->ssa, 0));
intr->intrinsic = op;
return true;
}
bool
v3d_nir_lower_load_store_bitsize(nir_shader *s)
{
@ -317,3 +349,12 @@ v3d_nir_lower_load_store_bitsize(nir_shader *s)
res |= nir_lower_mem_access_bit_sizes(s, &lower_options);
return res;
}
bool
v3d_nir_lower_global_2x32(nir_shader *s)
{
return nir_shader_intrinsics_pass(s, lower_global_2x32,
nir_metadata_block_index |
nir_metadata_dominance,
NULL);
}

View file

@ -1708,6 +1708,7 @@ v3d_attempt_compile(struct v3d_compile *c)
NIR_PASS(_, c->s, nir_lower_robust_access, &opts);
}
NIR_PASS(_, c->s, v3d_nir_lower_global_2x32);
NIR_PASS(_, c->s, nir_lower_wrmasks, should_split_wrmask, c->s);
NIR_PASS(_, c->s, v3d_nir_lower_load_store_bitsize);