mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-28 21:10:12 +01:00
aco: only insert fp mode when needed
Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/35746>
This commit is contained in:
parent
46c1bd1147
commit
d45f375a9d
5 changed files with 19 additions and 7 deletions
|
|
@ -157,7 +157,8 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
|
|||
if (!options->optimisations_disabled && !(debug_flags & DEBUG_NO_SCHED_ILP))
|
||||
schedule_ilp(program.get());
|
||||
|
||||
insert_fp_mode(program.get());
|
||||
if (program->needs_fp_mode_insertion)
|
||||
insert_fp_mode(program.get());
|
||||
|
||||
insert_waitcnt(program.get());
|
||||
insert_NOPs(program.get());
|
||||
|
|
|
|||
|
|
@ -233,6 +233,7 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
|
|||
program->next_fp_mode.denorm32 = 0;
|
||||
program->next_fp_mode.round16_64 = fp_round_ne;
|
||||
program->next_fp_mode.round32 = fp_round_ne;
|
||||
program->needs_fp_mode_insertion = false;
|
||||
}
|
||||
|
||||
bool
|
||||
|
|
|
|||
|
|
@ -2142,6 +2142,7 @@ public:
|
|||
Stage stage;
|
||||
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
|
||||
bool needs_wqm = false; /* there exists a p_wqm instruction */
|
||||
bool needs_fp_mode_insertion = false; /* insert_fp_mode should be run */
|
||||
bool has_smem_buffer_or_global_loads = false;
|
||||
bool has_pops_overlapped_waves_wait = false;
|
||||
bool has_color_exports = false;
|
||||
|
|
|
|||
|
|
@ -1429,6 +1429,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
|
|||
return select_program_rt(ctx, shader_count, shaders, args);
|
||||
|
||||
if (shader_count >= 2) {
|
||||
program->needs_fp_mode_insertion = true;
|
||||
select_program_merged(ctx, shader_count, shaders);
|
||||
} else {
|
||||
bool need_barrier = false, check_merged_wave_info = false, endif_merged_wave_info = false;
|
||||
|
|
@ -1437,6 +1438,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
|
|||
/* Handle separate compilation of VS+TCS and {VS,TES}+GS on GFX9+. */
|
||||
if (ctx.program->info.merged_shader_compiled_separately) {
|
||||
assert(ctx.program->gfx_level >= GFX9);
|
||||
program->needs_fp_mode_insertion = true;
|
||||
if (ctx.stage.sw == SWStage::VS || ctx.stage.sw == SWStage::TES) {
|
||||
check_merged_wave_info = endif_merged_wave_info = true;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -2510,9 +2510,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (instr->op == nir_op_f2f16_rtne && ctx->block->fp_mode.round16_64 != fp_round_ne) {
|
||||
/* We emit s_round_mode/s_setreg_imm32 in lower_to_hw_instr to
|
||||
* keep value numbering and the scheduler simpler.
|
||||
/* We emit s_round_mode/s_setreg_imm32 in insert_fp_mode to
|
||||
* keep value numbering and scheduling simpler.
|
||||
*/
|
||||
ctx->program->needs_fp_mode_insertion = true;
|
||||
if (dst.regClass() == v2b)
|
||||
bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, Definition(dst), src);
|
||||
else
|
||||
|
|
@ -2600,6 +2601,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
}
|
||||
|
||||
ctx->program->needs_fp_mode_insertion |= instr->op == nir_op_f2e4m3fn_satfn;
|
||||
|
||||
aco_opcode opcode = instr->op == nir_op_f2e4m3fn || instr->op == nir_op_f2e4m3fn_sat
|
||||
? aco_opcode::v_cvt_pk_fp8_f32
|
||||
: instr->op == nir_op_f2e4m3fn_satfn ? aco_opcode::p_v_cvt_pk_fp8_f32_ovfl
|
||||
|
|
@ -3215,10 +3218,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
Temp src = get_alu_src(ctx, instr->src[0]);
|
||||
if (dst.regClass() == v1) {
|
||||
Temp f16;
|
||||
if (ctx->block->fp_mode.round16_64 != fp_round_ne)
|
||||
if (ctx->block->fp_mode.round16_64 != fp_round_ne) {
|
||||
ctx->program->needs_fp_mode_insertion = true;
|
||||
f16 = bld.vop1(aco_opcode::p_v_cvt_f16_f32_rtne, bld.def(v2b), src);
|
||||
else
|
||||
} else {
|
||||
f16 = bld.vop1(aco_opcode::v_cvt_f16_f32, bld.def(v2b), src);
|
||||
}
|
||||
|
||||
if (ctx->block->fp_mode.denorm16_64 != fp_denorm_keep) {
|
||||
bld.vop1(aco_opcode::v_cvt_f32_f16, Definition(dst), f16);
|
||||
|
|
@ -3254,10 +3259,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
|
|||
}
|
||||
} else if (dst.regClass() == s1) {
|
||||
Temp f16;
|
||||
if (ctx->block->fp_mode.round16_64 != fp_round_ne)
|
||||
if (ctx->block->fp_mode.round16_64 != fp_round_ne) {
|
||||
ctx->program->needs_fp_mode_insertion = true;
|
||||
f16 = bld.sop1(aco_opcode::p_s_cvt_f16_f32_rtne, bld.def(s1), src);
|
||||
else
|
||||
} else {
|
||||
f16 = bld.sop1(aco_opcode::s_cvt_f16_f32, bld.def(s1), src);
|
||||
}
|
||||
|
||||
if (ctx->block->fp_mode.denorm16_64 != fp_denorm_keep) {
|
||||
bld.sop1(aco_opcode::s_cvt_f32_f16, Definition(dst), f16);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue