diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c index 8e40c0cacf0..425cc281b21 100644 --- a/src/gallium/drivers/radeonsi/si_shader_llvm.c +++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c @@ -1000,23 +1000,28 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade ctx->abi.disable_aniso_single_level = true; ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord; - unsigned num_outputs = info->num_outputs; - /* need extra output to hold primitive id added by nir lower */ - if (ctx->stage <= MESA_SHADER_GEOMETRY && - ctx->shader->key.ge.mono.u.vs_export_prim_id) - num_outputs++; + bool ls_need_output = + ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls && + shader->key.ge.opt.same_patch_vertices; - for (unsigned i = 0; i < num_outputs; i++) { - LLVMTypeRef type = ctx->ac.f32; + bool tcs_need_output = + ctx->stage == MESA_SHADER_TESS_CTRL && info->tessfactors_are_def_in_all_invocs; - /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */ - if (nir->info.stage == MESA_SHADER_FRAGMENT && - nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) - type = ctx->ac.f16; + bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT; - for (unsigned j = 0; j < 4; j++) { - ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, ""); - ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16; + if (ls_need_output || tcs_need_output || ps_need_output) { + for (unsigned i = 0; i < info->num_outputs; i++) { + LLVMTypeRef type = ctx->ac.f32; + + /* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */ + if (nir->info.stage == MESA_SHADER_FRAGMENT && + nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16) + type = ctx->ac.f16; + + for (unsigned j = 0; j < 4; j++) { + ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, ""); + ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16; + } } }