ir3: Plumb through descriptor prefetch intrinsics

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29873>
This commit is contained in:
Connor Abbott 2024-06-24 07:28:42 -04:00 committed by Marge Bot
parent b39b82dfbd
commit 45a57fa735
5 changed files with 86 additions and 6 deletions

View file

@ -1402,6 +1402,11 @@ intrinsic("inclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1],
intrinsic("exclusive_scan_clusters_ir3", dest_comp=1, src_comp=[1, 1],
bit_sizes=src0, indices=[REDUCTION_OP])
# IR3-specific intrinsics for prefetching descriptors in preambles.
intrinsic("prefetch_sam_ir3", [1, 1], flags=[CAN_REORDER])
intrinsic("prefetch_tex_ir3", [1], flags=[CAN_REORDER])
intrinsic("prefetch_ubo_ir3", [1], flags=[CAN_REORDER])
# Intrinsics used by the Midgard/Bifrost blend pipeline. These are defined
# within a blend shader to read/write the raw value from the tile buffer,
# without applying any format conversion in the process. If the shader needs

View file

@ -1151,10 +1151,11 @@ is_load(struct ir3_instruction *instr)
case OPC_LDP:
case OPC_L2G:
case OPC_LDLW:
case OPC_LDC:
case OPC_LDLV:
/* probably some others too.. */
return true;
case OPC_LDC:
return instr->dsts_count > 0;
default:
return false;
}
@ -1185,7 +1186,6 @@ uses_helpers(struct ir3_instruction *instr)
{
switch (instr->opc) {
/* These require helper invocations to be present */
case OPC_SAM:
case OPC_SAMB:
case OPC_GETLOD:
case OPC_DSX:
@ -1201,6 +1201,10 @@ uses_helpers(struct ir3_instruction *instr)
case OPC_META_TEX_PREFETCH:
return true;
/* sam requires helper invocations except for dummy prefetch instructions */
case OPC_SAM:
return instr->dsts_count != 0;
/* Subgroup operations don't require helper invocations to be present, but
* will use helper invocations if they are present.
*/
@ -2741,7 +2745,7 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
if (flags & IR3_INSTR_S2EN) {
nreg++;
}
if (src0) {
if (src0 || opc == OPC_SAM) {
nreg++;
}
if (src1) {
@ -2756,6 +2760,12 @@ ir3_SAM(struct ir3_block *block, opc_t opc, type_t type, unsigned wrmask,
}
if (src0) {
__ssa_src(sam, src0, 0);
} else if (opc == OPC_SAM) {
/* Create a dummy shared source for the coordinate, for the prefetch
* case. It needs to be shared so that we don't accidentally disable early
* preamble, and this is what the blob does.
*/
ir3_src_create(sam, regid(48, 0), IR3_REG_SHARED);
}
if (src1) {
__ssa_src(sam, src1, 0);

View file

@ -3042,6 +3042,47 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
load->push_consts.dst_base + load->push_consts.src_size, 4));
break;
}
case nir_intrinsic_prefetch_sam_ir3: {
struct tex_src_info info =
get_bindless_samp_src(ctx, &intr->src[0], &intr->src[1]);
struct ir3_instruction *sam =
emit_sam(ctx, OPC_SAM, info, TYPE_F32, 0b1111, NULL, NULL);
sam->dsts_count = 0;
array_insert(ctx->block, ctx->block->keeps, sam);
break;
}
case nir_intrinsic_prefetch_tex_ir3: {
struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
struct ir3_instruction *resinfo = ir3_RESINFO(b, idx, 0);
resinfo->cat6.iim_val = 1;
resinfo->cat6.d = 1;
resinfo->cat6.type = TYPE_U32;
resinfo->cat6.typed = false;
ir3_handle_bindless_cat6(resinfo, intr->src[0]);
if (resinfo->flags & IR3_INSTR_B)
ctx->so->bindless_tex = true;
resinfo->dsts_count = 0;
array_insert(ctx->block, ctx->block->keeps, resinfo);
break;
}
case nir_intrinsic_prefetch_ubo_ir3: {
struct ir3_instruction *offset = create_immed(ctx->block, 0);
struct ir3_instruction *idx = ir3_get_src(ctx, &intr->src[0])[0];
struct ir3_instruction *ldc = ir3_LDC(b, idx, 0, offset, 0);
ldc->cat6.iim_val = 1;
ldc->cat6.type = TYPE_U32;
ir3_handle_bindless_cat6(ldc, intr->src[0]);
if (ldc->flags & IR3_INSTR_B)
ctx->so->bindless_ubo = true;
ldc->dsts_count = 0;
array_insert(ctx->block, ctx->block->keeps, ldc);
break;
}
default:
ir3_context_error(ctx, "Unhandled intrinsic type: %s\n",
nir_intrinsic_infos[intr->intrinsic].name);

View file

@ -603,11 +603,11 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
}
}
if (is_tex_or_prefetch(n)) {
if (is_tex_or_prefetch(n) && n->dsts_count > 0) {
regmask_set(&state->needs_sy, n->dsts[0]);
if (n->opc == OPC_META_TEX_PREFETCH)
ctx->has_tex_prefetch = true;
} else if (n->opc == OPC_RESINFO) {
} else if (n->opc == OPC_RESINFO && n->dsts_count > 0) {
regmask_set(&state->needs_ss, n->dsts[0]);
ir3_NOP(block)->flags |= IR3_INSTR_SS;
last_input_needs_ss = false;
@ -791,6 +791,25 @@ apply_fine_deriv_macro(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
return true;
}
/* Some instructions can take a dummy destination of r63.x, which we model as it
* not having a destination in the IR to avoid having special code to handle
* this. Insert the dummy destination after everything else is done.
*/
static bool
expand_dummy_dests(struct ir3_block *block)
{
foreach_instr (n, &block->instr_list) {
if ((n->opc == OPC_SAM || n->opc == OPC_LDC || n->opc == OPC_RESINFO) &&
n->dsts_count == 0) {
struct ir3_register *dst = ir3_dst_create(n, INVALID_REG, 0);
/* Copy the blob's writemask */
if (n->opc == OPC_SAM)
dst->wrmask = 0b1111;
}
}
return true;
}
static void
apply_push_consts_load_macro(struct ir3_legalize_ctx *ctx,
struct ir3_block *block)
@ -1720,6 +1739,10 @@ ir3_legalize(struct ir3 *ir, struct ir3_shader_variant *so, int *max_bary)
so->compiler->gen >= 6)
helper_sched(ctx, ir, so);
foreach_block (block, &ir->block_list) {
progress |= expand_dummy_dests(block);
}
ir3_count_instructions(ir);
resolve_jumps(ir);

View file

@ -354,7 +354,8 @@ validate_instr(struct ir3_validate_ctx *ctx, struct ir3_instruction *instr)
switch (instr->opc) {
case OPC_RESINFO:
case OPC_RESFMT:
validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
if (instr->dsts_count > 0)
validate_reg_size(ctx, instr->dsts[0], instr->cat6.type);
validate_reg_size(ctx, instr->srcs[0], instr->cat6.type);
break;
case OPC_L2G: