diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index da627f5a2f9..af9ead23c93 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -6079,8 +6079,7 @@ get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr) lod = get_ssa_temp_tex(ctx, instr->src[lod_index].ssa, a16); } - if (ctx->options->key.image_2d_view_of_3d && - dim == GLSL_SAMPLER_DIM_2D && !is_array) { + if (ctx->program->info.image_2d_view_of_3d && dim == GLSL_SAMPLER_DIM_2D && !is_array) { /* The hw can't bind a slice of a 3D image as a 2D image, because it * ignores BASE_ARRAY if the target is 3D. The workaround is to read * BASE_ARRAY and set it as the 3rd address operand for all 2D images. @@ -10720,8 +10719,8 @@ export_fs_mrt_z(isel_context* ctx) values[i] = Operand(v1); } - bool writes_mrt0_alpha = - ctx->options->key.ps.alpha_to_coverage_via_mrtz && (ctx->outputs.mask[FRAG_RESULT_DATA0] & 0x8); + bool writes_mrt0_alpha = ctx->program->info.ps.alpha_to_coverage_via_mrtz && + (ctx->outputs.mask[FRAG_RESULT_DATA0] & 0x8); /* Both stencil and sample mask only need 16-bits. */ if (!ctx->program->info.ps.writes_z && !writes_mrt0_alpha && @@ -11042,7 +11041,7 @@ create_fs_jump_to_epilog(isel_context* ctx) } } - Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->options->key.ps.epilog.pc)); + Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog.pc)); aco_ptr jump{create_instruction( aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)}; @@ -11104,7 +11103,7 @@ create_fs_exports(isel_context* ctx) * require MRT0 to be written. Just copy MRT1 into MRT0. Skipping MRT1 exports seems to be * fine. */ - if (ctx->options->key.ps.epilog.mrt0_is_dual_src && !ctx->outputs.mask[FRAG_RESULT_DATA0] && + if (ctx->program->info.ps.epilog.mrt0_is_dual_src && !ctx->outputs.mask[FRAG_RESULT_DATA0] && ctx->outputs.mask[FRAG_RESULT_DATA1]) { u_foreach_bit (j, ctx->outputs.mask[FRAG_RESULT_DATA1]) { ctx->outputs.temps[FRAG_RESULT_DATA0 * 4u + j] = @@ -11126,9 +11125,9 @@ create_fs_exports(isel_context* ctx) out.slot = compacted_mrt_index; out.write_mask = ctx->outputs.mask[i]; - out.col_format = (ctx->options->key.ps.epilog.spi_shader_col_format >> (4 * idx)) & 0xf; - out.is_int8 = (ctx->options->key.ps.epilog.color_is_int8 >> idx) & 1; - out.is_int10 = (ctx->options->key.ps.epilog.color_is_int10 >> idx) & 1; + out.col_format = (ctx->program->info.ps.epilog.spi_shader_col_format >> (4 * idx)) & 0xf; + out.is_int8 = (ctx->program->info.ps.epilog.color_is_int8 >> idx) & 1; + out.is_int10 = (ctx->program->info.ps.epilog.color_is_int10 >> idx) & 1; out.enable_mrt_output_nan_fixup = (ctx->options->enable_mrt_output_nan_fixup >> idx) & 1; @@ -11147,7 +11146,7 @@ create_fs_exports(isel_context* ctx) } if (exported) { - if (ctx->options->gfx_level >= GFX11 && ctx->options->key.ps.epilog.mrt0_is_dual_src) { + if (ctx->options->gfx_level >= GFX11 && ctx->program->info.ps.epilog.mrt0_is_dual_src) { struct aco_export_mrt* mrt0 = mrts[0].enabled_channels ? &mrts[0] : NULL; struct aco_export_mrt* mrt1 = mrts[1].enabled_channels ? &mrts[1] : NULL; create_fs_dual_src_export_gfx11(ctx, mrt0, mrt1); @@ -11564,8 +11563,8 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const if (!ngg_gs && !tcs_skip_barrier) { sync_scope scope = ctx.stage == vertex_tess_control_hs && - ctx.options->key.tcs.tess_input_vertices == nir->info.tess.tcs_vertices_out && - program->wave_size % ctx.options->key.tcs.tess_input_vertices == 0 + ctx.program->info.tcs.tess_input_vertices == nir->info.tess.tcs_vertices_out && + program->wave_size % ctx.program->info.tcs.tess_input_vertices == 0 ? scope_subgroup : scope_workgroup; bld.barrier(aco_opcode::p_barrier, diff --git a/src/amd/compiler/aco_interface.cpp b/src/amd/compiler/aco_interface.cpp index 661ceb13081..41f8b0db75e 100644 --- a/src/amd/compiler/aco_interface.cpp +++ b/src/amd/compiler/aco_interface.cpp @@ -126,7 +126,7 @@ aco_postprocess_shader(const struct aco_compiler_options* options, validate(program.get()); /* Optimization */ - if (!options->key.optimisations_disabled) { + if (!options->optimisations_disabled) { if (!(aco::debug_flags & aco::DEBUG_NO_VN)) aco::value_numbering(program.get()); if (!(aco::debug_flags & aco::DEBUG_NO_OPT)) @@ -165,7 +165,7 @@ aco_postprocess_shader(const struct aco_compiler_options* options, aco_print_program(program.get(), stderr, live_vars, aco::print_live_vars | aco::print_kill); if (!info->is_trap_handler_shader) { - if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED)) + if (!options->optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED)) aco::schedule_program(program.get(), live_vars); validate(program.get()); @@ -182,7 +182,7 @@ aco_postprocess_shader(const struct aco_compiler_options* options, validate(program.get()); /* Optimization */ - if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) { + if (!options->optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) { aco::optimize_postRA(program.get()); validate(program.get()); } diff --git a/src/amd/compiler/aco_shader_info.h b/src/amd/compiler/aco_shader_info.h index d4c9a60fb3b..5b98501011b 100644 --- a/src/amd/compiler/aco_shader_info.h +++ b/src/amd/compiler/aco_shader_info.h @@ -63,11 +63,26 @@ struct aco_vs_prolog_info { gl_shader_stage next_stage; }; +struct aco_ps_epilog_info { + struct ac_arg inputs[8]; + struct ac_arg pc; + + uint32_t spi_shader_col_format; + + /* Bitmasks, each bit represents one of the 8 MRTs. */ + uint8_t color_is_int8; + uint8_t color_is_int10; + uint8_t enable_mrt_output_nan_fixup; + + bool mrt0_is_dual_src; +}; + struct aco_shader_info { uint8_t wave_size; bool is_ngg; bool has_ngg_culling; bool has_ngg_early_prim_export; + bool image_2d_view_of_3d; unsigned workgroup_size; struct { bool as_es; @@ -87,17 +102,22 @@ struct aco_shader_info { } gs; struct { uint32_t num_lds_blocks; + unsigned tess_input_vertices; } tcs; struct { bool as_es; } tes; struct { + struct aco_ps_epilog_info epilog; bool writes_z; bool writes_stencil; bool writes_sample_mask; bool has_epilog; uint32_t num_interp; unsigned spi_ps_input; + + /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ + bool alpha_to_coverage_via_mrtz; } ps; struct { uint8_t subgroup_size; @@ -114,38 +134,7 @@ enum aco_compiler_debug_level { ACO_COMPILER_DEBUG_LEVEL_ERROR, }; -struct aco_ps_epilog_info { - struct ac_arg inputs[8]; - struct ac_arg pc; - - uint32_t spi_shader_col_format; - - /* Bitmasks, each bit represents one of the 8 MRTs. */ - uint8_t color_is_int8; - uint8_t color_is_int10; - uint8_t enable_mrt_output_nan_fixup; - - bool mrt0_is_dual_src; -}; - -struct aco_stage_input { - uint32_t optimisations_disabled : 1; - uint32_t image_2d_view_of_3d : 1; - - struct { - unsigned tess_input_vertices; - } tcs; - - struct { - struct aco_ps_epilog_info epilog; - - /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ - bool alpha_to_coverage_via_mrtz; - } ps; -}; - struct aco_compiler_options { - struct aco_stage_input key; bool robust_buffer_access; bool dump_shader; bool dump_preoptir; @@ -153,6 +142,7 @@ struct aco_compiler_options { bool record_stats; bool has_ls_vgpr_init_bug; bool load_grid_size_from_user_sgpr; + bool optimisations_disabled; uint8_t enable_mrt_output_nan_fixup; bool wgp_mode; enum radeon_family family; diff --git a/src/amd/vulkan/radv_aco_shader_info.h b/src/amd/vulkan/radv_aco_shader_info.h index 32bb76d8fe7..ccfa6088eb0 100644 --- a/src/amd/vulkan/radv_aco_shader_info.h +++ b/src/amd/vulkan/radv_aco_shader_info.h @@ -34,11 +34,16 @@ #define ASSIGN_FIELD(x) aco_info->x = radv->x #define ASSIGN_FIELD_CP(x) memcpy(&aco_info->x, &radv->x, sizeof(radv->x)) +static inline void radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, + const struct radv_ps_epilog_key *radv, + const struct radv_shader_args *radv_args); + static inline void -radv_aco_convert_shader_info(struct aco_shader_info *aco_info, - const struct radv_shader_info *radv, - const struct radv_shader_args *radv_args) +radv_aco_convert_shader_info(struct aco_shader_info *aco_info, const struct radv_shader_info *radv, + const struct radv_shader_args *radv_args, + const struct radv_pipeline_key *radv_key) { + radv_aco_convert_ps_epilog_key(&aco_info->ps.epilog, &radv_key->ps.epilog, radv_args); ASSIGN_FIELD(wave_size); ASSIGN_FIELD(is_ngg); ASSIGN_FIELD(has_ngg_culling); @@ -68,6 +73,9 @@ radv_aco_convert_shader_info(struct aco_shader_info *aco_info, ASSIGN_FIELD(cs.uses_full_subgroups); aco_info->gfx9_gs_ring_lds_size = radv->gs_ring_info.lds_size; aco_info->is_trap_handler_shader = radv_args->type == RADV_SHADER_TYPE_TRAP_HANDLER; + aco_info->tcs.tess_input_vertices = radv_key->tcs.tess_input_vertices; + aco_info->ps.alpha_to_coverage_via_mrtz = radv_key->ps.alpha_to_coverage_via_mrtz; + aco_info->image_2d_view_of_3d = radv_key->image_2d_view_of_3d; } #define ASSIGN_VS_STATE_FIELD(x) aco_info->state.x = radv->state->x @@ -107,23 +115,11 @@ radv_aco_convert_ps_epilog_key(struct aco_ps_epilog_info *aco_info, aco_info->pc = radv_args->ps_epilog_pc; } -static inline void -radv_aco_convert_pipe_key(struct aco_stage_input *aco_info, const struct radv_pipeline_key *radv, - const struct radv_shader_args *radv_args) -{ - radv_aco_convert_ps_epilog_key(&aco_info->ps.epilog, &radv->ps.epilog, radv_args); - ASSIGN_FIELD(optimisations_disabled); - ASSIGN_FIELD(image_2d_view_of_3d); - ASSIGN_FIELD(tcs.tess_input_vertices); - ASSIGN_FIELD(ps.alpha_to_coverage_via_mrtz); -} - static inline void radv_aco_convert_opts(struct aco_compiler_options *aco_info, const struct radv_nir_compiler_options *radv, const struct radv_shader_args *radv_args) { - radv_aco_convert_pipe_key(&aco_info->key, &radv->key, radv_args); ASSIGN_FIELD(robust_buffer_access); ASSIGN_FIELD(dump_shader); ASSIGN_FIELD(dump_preoptir); @@ -139,6 +135,7 @@ radv_aco_convert_opts(struct aco_compiler_options *aco_info, ASSIGN_FIELD(debug.private_data); ASSIGN_FIELD(debug.private_data); aco_info->load_grid_size_from_user_sgpr = radv_args->load_grid_size_from_user_sgpr; + aco_info->optimisations_disabled = radv->key.optimisations_disabled; } #undef ASSIGN_VS_STATE_FIELD #undef ASSIGN_VS_STATE_FIELD_CP diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c index dbbb797bc0b..0129516cee5 100644 --- a/src/amd/vulkan/radv_shader.c +++ b/src/amd/vulkan/radv_shader.c @@ -2213,7 +2213,7 @@ shader_compile(struct radv_device *device, struct nir_shader *const *shaders, in struct aco_shader_info ac_info; struct aco_compiler_options ac_opts; radv_aco_convert_opts(&ac_opts, options, args); - radv_aco_convert_shader_info(&ac_info, info, args); + radv_aco_convert_shader_info(&ac_info, info, args, &options->key); aco_compile_shader(&ac_opts, &ac_info, shader_count, shaders, &args->ac, &radv_aco_build_shader_binary, (void **)&binary); } @@ -2373,7 +2373,7 @@ radv_create_rt_prolog(struct radv_device *device) struct radv_shader_binary *binary = NULL; struct aco_shader_info ac_info; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &info, &in_args); + radv_aco_convert_shader_info(&ac_info, &info, &in_args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &in_args); aco_compile_rt_prolog(&ac_opts, &ac_info, &in_args.ac, &out_args.ac, &radv_aco_build_shader_binary, (void **)&binary); @@ -2441,7 +2441,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke struct aco_shader_info ac_info; struct aco_vs_prolog_info ac_prolog_info; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &info, &args); + radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &args); radv_aco_convert_vs_prolog_key(&ac_prolog_info, key, &args); aco_compile_vs_prolog(&ac_opts, &ac_info, &ac_prolog_info, &args.ac, &radv_aco_build_shader_part, @@ -2494,7 +2494,7 @@ radv_create_ps_epilog(struct radv_device *device, const struct radv_ps_epilog_ke struct aco_shader_info ac_info; struct aco_ps_epilog_info ac_epilog_info; struct aco_compiler_options ac_opts; - radv_aco_convert_shader_info(&ac_info, &info, &args); + radv_aco_convert_shader_info(&ac_info, &info, &args, &options.key); radv_aco_convert_opts(&ac_opts, &options, &args); radv_aco_convert_ps_epilog_key(&ac_epilog_info, key, &args); aco_compile_ps_epilog(&ac_opts, &ac_info, &ac_epilog_info, &args.ac, &radv_aco_build_shader_part,