From ee9ccd7ac523243408b8bcde9c7896a2aee1d7c2 Mon Sep 17 00:00:00 2001 From: Qiang Yu Date: Mon, 26 Dec 2022 10:03:59 +0800 Subject: [PATCH] radeonsi: only init llvm output when needed. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are the cases which still uses nir_store_output. Reviewed-by: Marek Olšák Signed-off-by: Qiang Yu Part-of: --- src/gallium/drivers/radeonsi/si_shader_llvm.c | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 8e40c0cacf0..425cc281b21 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1000,23 +1000,28 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ctx->abi.disable_aniso_single_level = true; ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord; - unsigned num_outputs = info->num_outputs; - /* need extra output to hold primitive id added by nir lower */ - if (ctx->stage <= MESA_SHADER_GEOMETRY && - ctx->shader->key.ge.mono.u.vs_export_prim_id) - num_outputs++; + bool ls_need_output = + ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls && + shader->key.ge.opt.same_patch_vertices; - for (unsigned i = 0; i < num_outputs; i++) { - LLVMTypeRef type = ctx->ac.f32; + bool tcs_need_output = + ctx->stage == MESA_SHADER_TESS_CTRL && info->tessfactors_are_def_in_all_invocs; - /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && - nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) - type = ctx->ac.f16; + bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT; - for (unsigned j = 0; j < 4; j++) { - ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, ""); - ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16; + if (ls_need_output || tcs_need_output || ps_need_output) { + for (unsigned i = 0; i < info->num_outputs; i++) { + LLVMTypeRef type = ctx->ac.f32; + + /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && + nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) + type = ctx->ac.f16; + + for (unsigned j = 0; j < 4; j++) { + ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, ""); + ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16; + } } }