radeonsi: only init llvm output when needed.

These are the cases which still uses nir_store_output.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Signed-off-by: Qiang Yu <yuq825@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21435>
This commit is contained in:
Qiang Yu 2022-12-26 10:03:59 +08:00 committed by Marge Bot
parent 87ec94f6aa
commit ee9ccd7ac5

View file

@ -1000,23 +1000,28 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
ctx->abi.disable_aniso_single_level = true;
ctx->abi.conformant_trunc_coord = ctx->screen->info.conformant_trunc_coord;
unsigned num_outputs = info->num_outputs;
/* need extra output to hold primitive id added by nir lower */
if (ctx->stage <= MESA_SHADER_GEOMETRY &&
ctx->shader->key.ge.mono.u.vs_export_prim_id)
num_outputs++;
bool ls_need_output =
ctx->stage == MESA_SHADER_VERTEX && shader->key.ge.as_ls &&
shader->key.ge.opt.same_patch_vertices;
for (unsigned i = 0; i < num_outputs; i++) {
LLVMTypeRef type = ctx->ac.f32;
bool tcs_need_output =
ctx->stage == MESA_SHADER_TESS_CTRL && info->tessfactors_are_def_in_all_invocs;
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
type = ctx->ac.f16;
bool ps_need_output = ctx->stage == MESA_SHADER_FRAGMENT;
for (unsigned j = 0; j < 4; j++) {
ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, "");
ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
if (ls_need_output || tcs_need_output || ps_need_output) {
for (unsigned i = 0; i < info->num_outputs; i++) {
LLVMTypeRef type = ctx->ac.f32;
/* Only FS uses unpacked f16. Other stages pack 16-bit outputs into low and high bits of f32. */
if (nir->info.stage == MESA_SHADER_FRAGMENT &&
nir_alu_type_get_type_size(ctx->shader->selector->info.output_type[i]) == 16)
type = ctx->ac.f16;
for (unsigned j = 0; j < 4; j++) {
ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, type, "");
ctx->abi.is_16bit[i * 4 + j] = type == ctx->ac.f16;
}
}
}