mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-04 11:20:20 +01:00
radv/ac: setup mrt exports then export them in one go. (v2)
Noticed while looking at Sascha Willems deferred shaders.
This is a bit of an llvm workaround, llvm was producing this:
v_cvt_pkrtz_f16_f32_e64 v4, v7, v8 ; D2960004 00021107
v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0 ; D2960006 0001E509
s_waitcnt vmcnt(0) ; BF8C0F70
exp mrt0 v4, v4, v6, v6 compr ; C400040F 00000604
s_waitcnt expcnt(0) ; BF8C0F0F
v_cvt_pkrtz_f16_f32_e64 v4, v12, v5 ; D2960004 00020B0C
v_cvt_pkrtz_f16_f32_e64 v5, v14, 1.0 ; D2960005 0001E50E
exp mrt1 v4, v4, v5, v5 compr ; C400041F 00000504
s_waitcnt expcnt(0) ; BF8C0F0F
v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300
v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702
exp mrt2 v0, v0, v1, v1 done compr vm ; C4001C2F 00000100
After this change:
v_cvt_pkrtz_f16_f32_e64 v4, v7, v8 ; D2960004 00021107
s_waitcnt vmcnt(0) ; BF8C0F70
v_cvt_pkrtz_f16_f32_e64 v0, v0, v1 ; D2960000 00020300
v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0 ; D2960006 0001E509
v_cvt_pkrtz_f16_f32_e64 v5, v12, v5 ; D2960005 00020B0C
v_cvt_pkrtz_f16_f32_e64 v7, v14, 1.0 ; D2960007 0001E50E
exp mrt0 v4, v4, v6, v6 compr ; C400040F 00000604
v_cvt_pkrtz_f16_f32_e64 v1, v2, v3 ; D2960001 00020702
exp mrt1 v5, v5, v7, v7 compr ; C400041F 00000705
exp mrt2 v0, v0, v1, v1 done compr vm ; C4001C2F 00000100
No waitcnt for exports are emitted.
v2: fixup index->mrt mapping (Bas).
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
parent
b2cedb3ea9
commit
7f77554b5b
1 changed files with 19 additions and 15 deletions
|
|
@ -5572,24 +5572,22 @@ handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
|
|||
write_tess_factors(ctx);
|
||||
}
|
||||
|
||||
static void
|
||||
static bool
|
||||
si_export_mrt_color(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef *color, unsigned param, bool is_last)
|
||||
LLVMValueRef *color, unsigned param, bool is_last,
|
||||
struct ac_export_args *args)
|
||||
{
|
||||
|
||||
struct ac_export_args args;
|
||||
|
||||
/* Export */
|
||||
si_llvm_init_export_args(ctx, color, param,
|
||||
&args);
|
||||
args);
|
||||
|
||||
if (is_last) {
|
||||
args.valid_mask = 1; /* whether the EXEC mask is valid */
|
||||
args.done = 1; /* DONE bit */
|
||||
} else if (!args.enabled_channels)
|
||||
return; /* unnecessary NULL export */
|
||||
args->valid_mask = 1; /* whether the EXEC mask is valid */
|
||||
args->done = 1; /* DONE bit */
|
||||
} else if (!args->enabled_channels)
|
||||
return false; /* unnecessary NULL export */
|
||||
|
||||
ac_build_export(&ctx->ac, &args);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -5639,6 +5637,7 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
|
|||
{
|
||||
unsigned index = 0;
|
||||
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
|
||||
struct ac_export_args color_args[8];
|
||||
|
||||
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
|
||||
LLVMValueRef values[4];
|
||||
|
|
@ -5667,15 +5666,20 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
|
|||
if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
|
||||
last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
|
||||
|
||||
si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
|
||||
index++;
|
||||
bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
|
||||
if (ret)
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < index; i++)
|
||||
ac_build_export(&ctx->ac, &color_args[i]);
|
||||
if (depth || stencil || samplemask)
|
||||
si_export_mrt_z(ctx, depth, stencil, samplemask);
|
||||
else if (!index)
|
||||
si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
|
||||
else if (!index) {
|
||||
si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
|
||||
ac_build_export(&ctx->ac, &color_args[0]);
|
||||
}
|
||||
|
||||
ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue