radv,aco: decouple shader_info/options from radv_shader_args

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13287>
This commit is contained in:
Samuel Pitoiset 2021-10-08 16:14:15 +02:00 committed by Marge Bot
parent 1429feaf29
commit 8ec6824335
11 changed files with 445 additions and 388 deletions

View file

@ -5004,7 +5004,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
nir_src offset = *nir_get_io_offset_src(instr);
if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.dynamic_inputs) {
if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->program->info->vs.dynamic_inputs) {
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
isel_err(offset.ssa->parent_instr,
"Unimplemented non-zero nir_intrinsic_load_input offset");
@ -5530,12 +5530,12 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr)
nir_const_value* index_cv = nir_src_as_const_value(instr->src[0]);
if (index_cv && instr->dest.ssa.bit_size == 32) {
struct radv_userdata_info *loc =
&ctx->args->shader_info->user_sgprs_locs.shader_data[AC_UD_INLINE_PUSH_CONSTANTS];
const struct radv_userdata_info *loc =
&ctx->program->info->user_sgprs_locs.shader_data[AC_UD_INLINE_PUSH_CONSTANTS];
unsigned start = (offset + index_cv->u32) / 4u;
unsigned num_inline_push_consts = loc->sgpr_idx != -1 ? loc->num_sgprs : 0;
start -= ctx->args->shader_info->min_push_constant_used / 4;
start -= ctx->program->info->min_push_constant_used / 4;
if (start + count <= num_inline_push_consts) {
std::array<Temp, NIR_MAX_VEC_COMPONENTS> elems;
aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
@ -8841,7 +8841,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
ctx->shader->info.stage == MESA_SHADER_TESS_EVAL);
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
bld.copy(Definition(dst), Operand::c32(ctx->args->options->key.tcs.tess_input_vertices));
bld.copy(Definition(dst), Operand::c32(ctx->options->key.tcs.tess_input_vertices));
break;
}
case nir_intrinsic_emit_vertex_with_counter: {
@ -11574,9 +11574,11 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt)
void
select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct radv_shader_args* args)
ac_shader_config* config, const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args)
{
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, args, false);
isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
if_context ic_merged_wave_info;
bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS);
@ -11591,12 +11593,12 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
Pseudo_instruction* startpgm = add_startpgm(&ctx);
append_logical_start(ctx.block);
if (unlikely(args->options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
if (unlikely(ctx.options->has_ls_vgpr_init_bug && ctx.stage == vertex_tess_control_hs))
fix_ls_vgpr_init_bug(&ctx, startpgm);
split_arguments(&ctx, startpgm);
if (!args->shader_info->vs.has_prolog &&
if (!info->vs.has_prolog &&
(program->stage.has(SWStage::VS) || program->stage.has(SWStage::TES))) {
Builder(ctx.program, ctx.block).sopp(aco_opcode::s_setprio, -1u, 0x3u);
}
@ -11693,9 +11695,11 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
void
select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args)
{
isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, args, true);
isel_context ctx = setup_isel_context(program, 1, &gs_shader, config, options, info, args, true);
ctx.block->fp_mode = program->next_fp_mode;
@ -11708,7 +11712,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
program->private_segment_buffer, Operand::c32(RING_GSVS_VS * 16u));
Operand stream_id = Operand::zero();
if (args->shader_info->so.num_outputs)
if (program->info->so.num_outputs)
stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(&ctx, ctx.args->ac.streamout_config), Operand::c32(0x20018u));
@ -11721,8 +11725,8 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
if (stream_id.isConstant() && stream != stream_id.constantValue())
continue;
unsigned num_components = args->shader_info->gs.num_stream_output_components[stream];
if (stream > 0 && (!num_components || !args->shader_info->so.num_outputs))
unsigned num_components = program->info->gs.num_stream_output_components[stream];
if (stream > 0 && (!num_components || !program->info->so.num_outputs))
continue;
memset(ctx.outputs.mask, 0, sizeof(ctx.outputs.mask));
@ -11737,17 +11741,17 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
unsigned offset = 0;
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
if (args->shader_info->gs.output_streams[i] != stream)
if (program->info->gs.output_streams[i] != stream)
continue;
unsigned output_usage_mask = args->shader_info->gs.output_usage_mask[i];
unsigned output_usage_mask = program->info->gs.output_usage_mask[i];
unsigned length = util_last_bit(output_usage_mask);
for (unsigned j = 0; j < length; ++j) {
if (!(output_usage_mask & (1 << j)))
continue;
Temp val = bld.tmp(v1);
unsigned const_offset = offset * args->shader_info->gs.vertices_out * 16 * 4;
unsigned const_offset = offset * program->info->gs.vertices_out * 16 * 4;
load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1, 0u, true,
true, true);
@ -11758,7 +11762,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
}
}
if (args->shader_info->so.num_outputs) {
if (program->info->so.num_outputs) {
emit_streamout(&ctx, stream);
bld.reset(ctx.block);
}
@ -11790,17 +11794,19 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
void
select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args)
{
assert(args->options->chip_class == GFX8);
assert(options->chip_class == GFX8);
init_program(program, compute_cs, args->shader_info, args->options->chip_class,
args->options->family, args->options->wgp_mode, config);
init_program(program, compute_cs, info, options->chip_class,
options->family, options->wgp_mode, config);
isel_context ctx = {};
ctx.program = program;
ctx.args = args;
ctx.options = args->options;
ctx.options = options;
ctx.stage = program->stage;
ctx.block = ctx.program->create_and_insert_block();
@ -11952,16 +11958,18 @@ calc_nontrivial_instance_id(Builder& bld, const struct radv_shader_args* args, u
void
select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args, unsigned* num_preserved_sgprs)
{
assert(key->num_attributes > 0);
/* This should be enough for any shader/stage. */
unsigned max_user_sgprs = args->options->chip_class >= GFX9 ? 32 : 16;
unsigned max_user_sgprs = options->chip_class >= GFX9 ? 32 : 16;
*num_preserved_sgprs = max_user_sgprs + 14;
init_program(program, compute_cs, args->shader_info, args->options->chip_class,
args->options->family, args->options->wgp_mode, config);
init_program(program, compute_cs, info, options->chip_class,
options->family, options->wgp_mode, config);
Block* block = program->create_and_insert_block();
block->kind = block_kind_top_level;
@ -12001,7 +12009,7 @@ select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key, ac_shad
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers, s1),
get_arg_fixed(args, args->ac.vertex_buffers));
bld.sop1(aco_opcode::s_mov_b32, Definition(vertex_buffers.advance(4), s1),
Operand::c32((unsigned)args->options->address32_hi));
Operand::c32((unsigned)options->address32_hi));
/* calculate vgpr requirements */
unsigned num_vgprs = attributes_start.reg() - 256;

View file

@ -116,6 +116,8 @@ void cleanup_context(isel_context* ctx);
isel_context setup_isel_context(Program* program, unsigned shader_count,
struct nir_shader* const* shaders, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args, bool is_gs_copy_shader);
} // namespace aco

View file

@ -273,7 +273,7 @@ setup_vs_variables(isel_context* ctx, nir_shader* nir)
/* TODO: NGG streamout */
if (ctx->stage.hw == HWStage::NGG)
assert(!ctx->args->shader_info->so.num_outputs);
assert(!ctx->program->info->so.num_outputs);
}
if (ctx->stage == vertex_ngg) {
@ -301,23 +301,23 @@ setup_gs_variables(isel_context* ctx, nir_shader* nir)
void
setup_tcs_info(isel_context* ctx, nir_shader* nir, nir_shader* vs)
{
ctx->tcs_in_out_eq = ctx->args->shader_info->vs.tcs_in_out_eq;
ctx->tcs_temp_only_inputs = ctx->args->shader_info->vs.tcs_temp_only_input_mask;
ctx->tcs_num_patches = ctx->args->shader_info->num_tess_patches;
ctx->program->config->lds_size = ctx->args->shader_info->tcs.num_lds_blocks;
ctx->tcs_in_out_eq = ctx->program->info->vs.tcs_in_out_eq;
ctx->tcs_temp_only_inputs = ctx->program->info->vs.tcs_temp_only_input_mask;
ctx->tcs_num_patches = ctx->program->info->num_tess_patches;
ctx->program->config->lds_size = ctx->program->info->tcs.num_lds_blocks;
}
void
setup_tes_variables(isel_context* ctx, nir_shader* nir)
{
ctx->tcs_num_patches = ctx->args->shader_info->num_tess_patches;
ctx->tcs_num_patches = ctx->program->info->num_tess_patches;
if (ctx->stage == tess_eval_vs || ctx->stage == tess_eval_ngg) {
setup_vs_output_info(ctx, nir, &ctx->program->info->tes.outinfo);
/* TODO: NGG streamout */
if (ctx->stage.hw == HWStage::NGG)
assert(!ctx->args->shader_info->so.num_outputs);
assert(!ctx->program->info->so.num_outputs);
}
if (ctx->stage == tess_eval_ngg) {
@ -388,9 +388,9 @@ init_context(isel_context* ctx, nir_shader* shader)
ctx->range_ht = _mesa_pointer_hash_table_create(NULL);
ctx->ub_config.min_subgroup_size = 64;
ctx->ub_config.max_subgroup_size = 64;
if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->args->shader_info->cs.subgroup_size) {
ctx->ub_config.min_subgroup_size = ctx->args->shader_info->cs.subgroup_size;
ctx->ub_config.max_subgroup_size = ctx->args->shader_info->cs.subgroup_size;
if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->program->info->cs.subgroup_size) {
ctx->ub_config.min_subgroup_size = ctx->program->info->cs.subgroup_size;
ctx->ub_config.max_subgroup_size = ctx->program->info->cs.subgroup_size;
}
ctx->ub_config.max_workgroup_invocations = 2048;
ctx->ub_config.max_workgroup_count[0] = 65535;
@ -797,8 +797,8 @@ init_context(isel_context* ctx, nir_shader* shader)
}
}
ctx->program->config->spi_ps_input_ena = ctx->args->shader_info->ps.spi_ps_input;
ctx->program->config->spi_ps_input_addr = ctx->args->shader_info->ps.spi_ps_input;
ctx->program->config->spi_ps_input_ena = ctx->program->info->ps.spi_ps_input;
ctx->program->config->spi_ps_input_addr = ctx->program->info->ps.spi_ps_input;
ctx->cf_info.nir_to_aco = std::move(nir_to_aco);
@ -819,7 +819,9 @@ cleanup_context(isel_context* ctx)
isel_context
setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct radv_shader_args* args, bool is_gs_copy_shader)
ac_shader_config* config, const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args, bool is_gs_copy_shader)
{
SWStage sw_stage = SWStage::None;
for (unsigned i = 0; i < shader_count; i++) {
@ -835,12 +837,12 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
default: unreachable("Shader stage not implemented");
}
}
bool gfx9_plus = args->options->chip_class >= GFX9;
bool ngg = args->shader_info->is_ngg && args->options->chip_class >= GFX10;
bool gfx9_plus = options->chip_class >= GFX9;
bool ngg = info->is_ngg && options->chip_class >= GFX10;
HWStage hw_stage{};
if (sw_stage == SWStage::VS && args->shader_info->vs.as_es && !ngg)
if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
hw_stage = HWStage::ES;
else if (sw_stage == SWStage::VS && !args->shader_info->vs.as_ls && !ngg)
else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg)
hw_stage = HWStage::VS;
else if (sw_stage == SWStage::VS && ngg)
hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */
@ -856,17 +858,17 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
else if (sw_stage == SWStage::VS_GS && ngg)
hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */
else if (sw_stage == SWStage::VS && args->shader_info->vs.as_ls)
else if (sw_stage == SWStage::VS && info->vs.as_ls)
hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
else if (sw_stage == SWStage::TCS)
hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
else if (sw_stage == SWStage::VS_TCS)
hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && !ngg)
else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg)
hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
else if (sw_stage == SWStage::TES && !args->shader_info->tes.as_es && ngg)
else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg)
hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */
else if (sw_stage == SWStage::TES && args->shader_info->tes.as_es && !ngg)
else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg)
hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
@ -875,16 +877,16 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
else
unreachable("Shader stage not implemented");
init_program(program, Stage{hw_stage, sw_stage}, args->shader_info, args->options->chip_class,
args->options->family, args->options->wgp_mode, config);
init_program(program, Stage{hw_stage, sw_stage}, info, options->chip_class,
options->family, options->wgp_mode, config);
isel_context ctx = {};
ctx.program = program;
ctx.args = args;
ctx.options = args->options;
ctx.options = options;
ctx.stage = program->stage;
program->workgroup_size = args->shader_info->workgroup_size;
program->workgroup_size = program->info->workgroup_size;
assert(program->workgroup_size);
if (ctx.stage == tess_control_hs)
@ -897,7 +899,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
unsigned scratch_size = 0;
if (program->stage == gs_copy_vs) {
assert(shader_count == 1);
setup_vs_output_info(&ctx, shaders[0], &args->shader_info->vs.outinfo);
setup_vs_output_info(&ctx, shaders[0], &program->info->vs.outinfo);
} else {
for (unsigned i = 0; i < shader_count; i++) {
nir_shader* nir = shaders[i];

View file

@ -74,29 +74,32 @@ validate(aco::Program* program)
}
void
aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
struct radv_shader_binary** binary, const struct radv_shader_args* args)
aco_compile_shader(const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
unsigned shader_count, struct nir_shader* const* shaders,
const struct radv_shader_args *args,
struct radv_shader_binary** binary)
{
aco::init();
ac_shader_config config = {0};
std::unique_ptr<aco::Program> program{new aco::Program};
program->collect_statistics = args->options->record_stats;
program->collect_statistics = options->record_stats;
if (program->collect_statistics)
memset(program->statistics, 0, sizeof(program->statistics));
program->debug.func = args->options->debug.func;
program->debug.private_data = args->options->debug.private_data;
program->debug.func = options->debug.func;
program->debug.private_data = options->debug.private_data;
/* Instruction Selection */
if (args->is_gs_copy_shader)
aco::select_gs_copy_shader(program.get(), shaders[0], &config, args);
aco::select_gs_copy_shader(program.get(), shaders[0], &config, options, info, args);
else if (args->is_trap_handler_shader)
aco::select_trap_handler_shader(program.get(), shaders[0], &config, args);
aco::select_trap_handler_shader(program.get(), shaders[0], &config, options, info, args);
else
aco::select_program(program.get(), shader_count, shaders, &config, args);
if (args->options->dump_preoptir)
aco::select_program(program.get(), shader_count, shaders, &config, options, info, args);
if (options->dump_preoptir)
aco_print_program(program.get(), stderr);
aco::live live_vars;
@ -107,7 +110,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
validate(program.get());
/* Optimization */
if (!args->options->key.optimisations_disabled) {
if (!options->key.optimisations_disabled) {
if (!(aco::debug_flags & aco::DEBUG_NO_VN))
aco::value_numbering(program.get());
if (!(aco::debug_flags & aco::DEBUG_NO_OPT))
@ -125,7 +128,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
}
std::string llvm_ir;
if (args->options->record_ir) {
if (options->record_ir) {
char* data = NULL;
size_t size = 0;
u_memstream mem;
@ -143,11 +146,11 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
if (program->collect_statistics)
aco::collect_presched_stats(program.get());
if ((aco::debug_flags & aco::DEBUG_LIVE_INFO) && args->options->dump_shader)
if ((aco::debug_flags & aco::DEBUG_LIVE_INFO) && options->dump_shader)
aco_print_program(program.get(), stderr, live_vars, aco::print_live_vars | aco::print_kill);
if (!args->is_trap_handler_shader) {
if (!args->options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED))
if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_SCHED))
aco::schedule_program(program.get(), live_vars);
validate(program.get());
@ -157,14 +160,14 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
if (aco::validate_ra(program.get())) {
aco_print_program(program.get(), stderr);
abort();
} else if (args->options->dump_shader) {
} else if (options->dump_shader) {
aco_print_program(program.get(), stderr);
}
validate(program.get());
/* Optimization */
if (!args->options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) {
if (!options->key.optimisations_disabled && !(aco::debug_flags & aco::DEBUG_NO_OPT)) {
aco::optimize_postRA(program.get());
validate(program.get());
}
@ -192,7 +195,7 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
if (program->collect_statistics)
aco::collect_postasm_stats(program.get(), code);
bool get_disasm = args->options->dump_shader || args->options->record_ir;
bool get_disasm = options->dump_shader || options->record_ir;
size_t size = llvm_ir.size();
@ -266,8 +269,11 @@ aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
}
void
aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_binary** binary,
const struct radv_shader_args* args)
aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary)
{
aco::init();
@ -280,10 +286,10 @@ aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_b
/* create IR */
unsigned num_preserved_sgprs;
aco::select_vs_prolog(program.get(), key, &config, args, &num_preserved_sgprs);
aco::select_vs_prolog(program.get(), key, &config, options, info, args, &num_preserved_sgprs);
aco::insert_NOPs(program.get());
if (args->options->dump_shader)
if (options->dump_shader)
aco_print_program(program.get(), stderr);
/* assembly */
@ -291,7 +297,7 @@ aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_b
code.reserve(align(program->blocks[0].instructions.size() * 2, 16));
unsigned exec_size = aco::emit_program(program.get(), code);
if (args->options->dump_shader) {
if (options->dump_shader) {
aco::print_asm(program.get(), code, exec_size / 4u, stderr);
fprintf(stderr, "\n");
}

View file

@ -41,11 +41,17 @@ struct aco_compiler_statistic_info {
extern const unsigned aco_num_statistics;
extern const struct aco_compiler_statistic_info* aco_statistic_infos;
void aco_compile_shader(unsigned shader_count, struct nir_shader* const* shaders,
struct radv_shader_binary** binary, const struct radv_shader_args* args);
void aco_compile_shader(const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
unsigned shader_count, struct nir_shader* const* shaders,
const struct radv_shader_args *args,
struct radv_shader_binary** binary);
void aco_compile_vs_prolog(const struct radv_vs_prolog_key* key, struct radv_prolog_binary** binary,
const struct radv_shader_args* args);
void aco_compile_vs_prolog(const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_vs_prolog_key* key,
const struct radv_shader_args* args,
struct radv_prolog_binary** binary);
#ifdef __cplusplus
}

View file

@ -2143,13 +2143,23 @@ void init_program(Program* program, Stage stage, const struct radv_shader_info*
ac_shader_config* config);
void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
ac_shader_config* config, const struct radv_shader_args* args);
ac_shader_config* config, const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args);
void select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args);
void select_trap_handler_shader(Program* program, struct nir_shader* shader,
ac_shader_config* config, const struct radv_shader_args* args);
ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args);
void select_vs_prolog(Program* program, const struct radv_vs_prolog_key* key,
ac_shader_config* config, const struct radv_shader_args* args,
ac_shader_config* config,
const struct radv_nir_compiler_options* options,
const struct radv_shader_info* info,
const struct radv_shader_args* args,
unsigned* num_preserved_sgprs);
void lower_phis(Program* program);

View file

@ -44,6 +44,8 @@ struct radv_shader_context {
struct ac_llvm_context ac;
const struct nir_shader *shader;
struct ac_shader_abi abi;
const struct radv_nir_compiler_options *options;
struct radv_shader_info *shader_info;
const struct radv_shader_args *args;
gl_shader_stage stage;
@ -111,8 +113,8 @@ create_llvm_function(struct ac_llvm_context *ctx, LLVMModuleRef module, LLVMBuil
static void
load_descriptor_sets(struct radv_shader_context *ctx)
{
struct radv_userdata_locations *user_sgprs_locs = &ctx->args->shader_info->user_sgprs_locs;
uint32_t mask = ctx->args->shader_info->desc_set_used_mask;
struct radv_userdata_locations *user_sgprs_locs = &ctx->shader_info->user_sgprs_locs;
uint32_t mask = ctx->shader_info->desc_set_used_mask;
if (user_sgprs_locs->shader_data[AC_UD_INDIRECT_DESCRIPTOR_SETS].sgpr_idx != -1) {
LLVMValueRef desc_sets = ac_get_arg(&ctx->ac, ctx->args->descriptor_sets[0]);
@ -168,7 +170,7 @@ static void
create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has_previous_stage)
{
if (ctx->ac.chip_class >= GFX10) {
if (is_pre_gs_stage(stage) && ctx->args->shader_info->is_ngg) {
if (is_pre_gs_stage(stage) && ctx->shader_info->is_ngg) {
/* On GFX10, VS is merged into GS for NGG. */
stage = MESA_SHADER_GEOMETRY;
has_previous_stage = true;
@ -178,7 +180,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
ctx->main_function =
create_llvm_function(&ctx->ac, ctx->ac.module, ctx->ac.builder, &ctx->args->ac,
get_llvm_calling_convention(ctx->main_function, stage),
ctx->max_workgroup_size, ctx->args->options);
ctx->max_workgroup_size, ctx->options);
ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST), NULL, 0,
@ -189,7 +191,7 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
load_descriptor_sets(ctx);
if (stage == MESA_SHADER_TESS_CTRL ||
(stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.as_ls) ||
(stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_ls) ||
/* GFX9 has the ESGS ring buffer in LDS. */
(stage == MESA_SHADER_GEOMETRY && has_previous_stage)) {
ac_declare_lds_as_pointer(&ctx->ac);
@ -202,7 +204,7 @@ radv_load_resource(struct ac_shader_abi *abi, LLVMValueRef index, unsigned desc_
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
unsigned base_offset = layout->binding[binding].offset;
LLVMValueRef offset, stride;
@ -261,7 +263,7 @@ load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ac_array_in_const_addr_space(ctx->ac.v2f32), "");
uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->args->options->key.ps.num_samples);
uint32_t sample_pos_offset = radv_get_sample_pos_offset(ctx->options->key.ps.num_samples);
sample_id = LLVMBuildAdd(ctx->ac.builder, sample_id,
LLVMConstInt(ctx->ac.i32, sample_pos_offset, false), "");
@ -276,10 +278,10 @@ load_sample_mask_in(struct ac_shader_abi *abi)
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
uint8_t log2_ps_iter_samples;
if (ctx->args->shader_info->ps.uses_sample_shading) {
log2_ps_iter_samples = util_logbase2(ctx->args->options->key.ps.num_samples);
if (ctx->shader_info->ps.uses_sample_shading) {
log2_ps_iter_samples = util_logbase2(ctx->options->key.ps.num_samples);
} else {
log2_ps_iter_samples = ctx->args->options->key.ps.log2_ps_iter_samples;
log2_ps_iter_samples = ctx->options->key.ps.log2_ps_iter_samples;
}
LLVMValueRef result, sample_id;
@ -306,14 +308,14 @@ visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMV
unsigned offset = 0;
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
if (ctx->args->shader_info->is_ngg) {
if (ctx->shader_info->is_ngg) {
gfx10_ngg_gs_emit_vertex(ctx, stream, vertexidx, addrs);
return;
}
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
uint8_t output_stream = ctx->shader_info->gs.output_streams[i];
LLVMValueRef *out_ptr = &addrs[i * 4];
int length = util_last_bit(output_usage_mask);
@ -351,7 +353,7 @@ visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
{
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
if (ctx->args->shader_info->is_ngg) {
if (ctx->shader_info->is_ngg) {
LLVMBuildStore(ctx->ac.builder, ctx->ac.i32_0, ctx->gs_curprim_verts[stream]);
return;
}
@ -406,7 +408,7 @@ get_desc_ptr(struct radv_shader_context *ctx, LLVMValueRef ptr, bool non_uniform
if (non_uniform) {
/* 32-bit seems to always use SMEM. addrspacecast from 32-bit -> 64-bit is broken. */
LLVMValueRef dwords[] = {ptr,
LLVMConstInt(ctx->ac.i32, ctx->args->options->address32_hi, false)};
LLVMConstInt(ctx->ac.i32, ctx->options->address32_hi, false)};
ptr = ac_build_gather_values(&ctx->ac, dwords, 2);
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
addr_space = AC_ADDR_SPACE_CONST;
@ -439,7 +441,7 @@ radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bo
LLVMValueRef result;
if (valid_binding) {
struct radv_pipeline_layout *pipeline_layout = ctx->args->options->layout;
struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
@ -461,7 +463,7 @@ radv_load_ubo(struct ac_shader_abi *abi, unsigned desc_set, unsigned binding, bo
LLVMValueRef desc_components[4] = {
LLVMBuildPtrToInt(ctx->ac.builder, buffer_ptr, ctx->ac.intptr, ""),
LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->args->options->address32_hi),
LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi),
false),
LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
LLVMConstInt(ctx->ac.i32, desc_type, false),
@ -489,7 +491,7 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
struct radv_descriptor_set_layout *layout =
ctx->args->options->layout->set[descriptor_set].layout;
ctx->options->layout->set[descriptor_set].layout;
struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
unsigned offset = binding->offset;
unsigned stride = binding->size;
@ -584,7 +586,7 @@ radv_get_sampler_desc(struct ac_shader_abi *abi, unsigned descriptor_set, unsign
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
} else if (desc_type == AC_DESC_IMAGE &&
ctx->args->options->has_image_load_dcc_bug &&
ctx->options->has_image_load_dcc_bug &&
image && !write) {
LLVMValueRef components[8];
@ -684,17 +686,17 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
LLVMValueRef input;
LLVMValueRef buffer_index;
unsigned attrib_index = driver_location - VERT_ATTRIB_GENERIC0;
unsigned attrib_format = ctx->args->options->key.vs.vertex_attribute_formats[attrib_index];
unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
unsigned data_format = attrib_format & 0x0f;
unsigned num_format = (attrib_format >> 4) & 0x07;
bool is_float =
num_format != V_008F0C_BUF_NUM_FORMAT_UINT && num_format != V_008F0C_BUF_NUM_FORMAT_SINT;
uint8_t input_usage_mask =
ctx->args->shader_info->vs.input_usage_mask[driver_location];
ctx->shader_info->vs.input_usage_mask[driver_location];
unsigned num_input_channels = util_last_bit(input_usage_mask);
if (ctx->args->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
uint32_t divisor = ctx->args->options->key.vs.instance_rate_divisors[attrib_index];
if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
if (divisor) {
buffer_index = ctx->abi.instance_id;
@ -718,19 +720,19 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
/* Adjust the number of channels to load based on the vertex attribute format. */
unsigned num_channels = MIN2(num_input_channels, vtx_info->num_channels);
unsigned attrib_binding = ctx->args->options->key.vs.vertex_attribute_bindings[attrib_index];
unsigned attrib_offset = ctx->args->options->key.vs.vertex_attribute_offsets[attrib_index];
unsigned attrib_stride = ctx->args->options->key.vs.vertex_attribute_strides[attrib_index];
unsigned alpha_adjust = ctx->args->options->key.vs.vertex_alpha_adjust[attrib_index];
unsigned attrib_binding = ctx->options->key.vs.vertex_attribute_bindings[attrib_index];
unsigned attrib_offset = ctx->options->key.vs.vertex_attribute_offsets[attrib_index];
unsigned attrib_stride = ctx->options->key.vs.vertex_attribute_strides[attrib_index];
unsigned alpha_adjust = ctx->options->key.vs.vertex_alpha_adjust[attrib_index];
if (ctx->args->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
/* Always load, at least, 3 channels for formats that need to be shuffled because X<->Z. */
num_channels = MAX2(num_channels, 3);
}
unsigned desc_index =
ctx->args->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
desc_index = util_bitcount(ctx->args->shader_info->vs.vb_desc_usage_mask &
ctx->shader_info->vs.use_per_attribute_vb_descs ? attrib_index : attrib_binding;
desc_index = util_bitcount(ctx->shader_info->vs.vb_desc_usage_mask &
u_bit_consecutive(0, desc_index));
t_offset = LLVMConstInt(ctx->ac.i32, desc_index, false);
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
@ -780,7 +782,7 @@ load_vs_input(struct radv_shader_context *ctx, unsigned driver_location, LLVMTyp
ctx->ac.i32_0, ctx->ac.i32_0, num_channels, data_format, num_format, 0, true);
}
if (ctx->args->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
if (ctx->options->key.vs.vertex_post_shuffle & (1 << attrib_index)) {
LLVMValueRef c[4];
c[0] = ac_llvm_extract_elem(&ctx->ac, input, 2);
c[1] = ac_llvm_extract_elem(&ctx->ac, input, 1);
@ -904,9 +906,9 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
bool is_16bit = ac_get_type_size(LLVMTypeOf(values[0])) == 2;
if (ctx->stage == MESA_SHADER_FRAGMENT) {
unsigned index = target - V_008DFC_SQ_EXP_MRT;
unsigned col_format = (ctx->args->options->key.ps.col_format >> (4 * index)) & 0xf;
bool is_int8 = (ctx->args->options->key.ps.is_int8 >> index) & 1;
bool is_int10 = (ctx->args->options->key.ps.is_int10 >> index) & 1;
unsigned col_format = (ctx->options->key.ps.col_format >> (4 * index)) & 0xf;
bool is_int8 = (ctx->options->key.ps.is_int8 >> index) & 1;
bool is_int10 = (ctx->options->key.ps.is_int10 >> index) & 1;
LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL;
LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits,
@ -989,7 +991,7 @@ si_llvm_init_export_args(struct radv_shader_context *ctx, LLVMValueRef *values,
/* Replace NaN by zero (only 32-bit) to fix game bugs if
* requested.
*/
if (ctx->args->options->enable_mrt_output_nan_fixup && !is_16bit &&
if (ctx->options->enable_mrt_output_nan_fixup && !is_16bit &&
(col_format == V_028714_SPI_SHADER_32_R || col_format == V_028714_SPI_SHADER_32_GR ||
col_format == V_028714_SPI_SHADER_32_AR || col_format == V_028714_SPI_SHADER_32_ABGR ||
col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
@ -1145,7 +1147,7 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
LLVMValueRef buf_ptr = ac_get_arg(&ctx->ac, ctx->args->streamout_buffers);
for (i = 0; i < 4; i++) {
uint16_t stride = ctx->args->shader_info->so.strides[i];
uint16_t stride = ctx->shader_info->so.strides[i];
if (!stride)
continue;
@ -1164,9 +1166,9 @@ radv_emit_streamout(struct radv_shader_context *ctx, unsigned stream)
}
/* Write streamout data. */
for (i = 0; i < ctx->args->shader_info->so.num_outputs; i++) {
for (i = 0; i < ctx->shader_info->so.num_outputs; i++) {
struct radv_shader_output_values shader_out = {0};
struct radv_stream_output *output = &ctx->args->shader_info->so.outputs[i];
struct radv_stream_output *output = &ctx->shader_info->so.outputs[i];
if (stream != output->stream)
continue;
@ -1260,7 +1262,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
}
bool writes_primitive_shading_rate = outinfo->writes_primitive_shading_rate ||
ctx->args->options->force_vrs_rates;
ctx->options->force_vrs_rates;
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_layer ||
outinfo->writes_viewport_index || writes_primitive_shading_rate) {
@ -1281,7 +1283,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
if (outinfo->writes_layer == true)
pos_args[1].out[2] = layer_value;
if (outinfo->writes_viewport_index == true) {
if (ctx->args->options->chip_class >= GFX9) {
if (ctx->options->chip_class >= GFX9) {
/* GFX9 has the layer in out.z[10:0] and the viewport
* index in out.z[19:16].
*/
@ -1300,7 +1302,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
if (outinfo->writes_primitive_shading_rate) {
pos_args[1].out[1] = primitive_shading_rate;
} else if (ctx->args->options->force_vrs_rates) {
} else if (ctx->options->force_vrs_rates) {
/* Bits [2:3] = VRS rate X
* Bits [4:5] = VRS rate Y
*
@ -1312,7 +1314,7 @@ radv_llvm_export_vs(struct radv_shader_context *ctx, struct radv_shader_output_v
*
* Sample shading can't go above 8 samples, so both numbers can't be -2 at the same time.
*/
LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->args->options->force_vrs_rates, false);
LLVMValueRef rates = LLVMConstInt(ctx->ac.i32, ctx->options->force_vrs_rates, false);
LLVMValueRef cond;
LLVMValueRef v;
@ -1356,7 +1358,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo
struct radv_shader_output_values *outputs;
unsigned noutput = 0;
if (ctx->args->options->key.has_multiview_view_index) {
if (ctx->options->key.has_multiview_view_index) {
LLVMValueRef *tmp_out = &ctx->abi.outputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
if (!*tmp_out) {
for (unsigned i = 0; i < 4; ++i)
@ -1369,7 +1371,7 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo
ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
}
if (ctx->args->shader_info->so.num_outputs && !ctx->args->is_gs_copy_shader) {
if (ctx->shader_info->so.num_outputs && !ctx->args->is_gs_copy_shader) {
/* The GS copy shader emission already emits streamout. */
radv_emit_streamout(ctx, 0);
}
@ -1386,12 +1388,12 @@ handle_vs_outputs_post(struct radv_shader_context *ctx, bool export_prim_id, boo
outputs[noutput].slot_index = i == VARYING_SLOT_CLIP_DIST1;
if (ctx->stage == MESA_SHADER_VERTEX && !ctx->args->is_gs_copy_shader) {
outputs[noutput].usage_mask = ctx->args->shader_info->vs.output_usage_mask[i];
outputs[noutput].usage_mask = ctx->shader_info->vs.output_usage_mask[i];
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
outputs[noutput].usage_mask = ctx->args->shader_info->tes.output_usage_mask[i];
outputs[noutput].usage_mask = ctx->shader_info->tes.output_usage_mask[i];
} else {
assert(ctx->args->is_gs_copy_shader);
outputs[noutput].usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
outputs[noutput].usage_mask = ctx->shader_info->gs.output_usage_mask[i];
}
for (unsigned j = 0; j < 4; j++) {
@ -1463,7 +1465,7 @@ ngg_gs_get_vertex_storage(struct radv_shader_context *ctx)
{
unsigned num_outputs = util_bitcount64(ctx->output_mask);
if (ctx->args->options->key.has_multiview_view_index)
if (ctx->options->key.has_multiview_view_index)
num_outputs++;
LLVMTypeRef elements[2] = {
@ -1601,14 +1603,14 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
/* Copy Primitive IDs from GS threads to the LDS address corresponding
* to the ES thread of the provoking vertex.
*/
if (ctx->stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.outinfo.export_prim_id) {
if (ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.outinfo.export_prim_id) {
ac_build_ifcc(&ctx->ac, is_gs_thread, 5400);
LLVMValueRef provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, 0, false);
/* For provoking vertex last mode, use num_vtx_in_prim - 1. */
if (ctx->args->options->key.vs.provoking_vtx_last) {
uint8_t outprim = si_conv_prim_to_gs_out(ctx->args->options->key.vs.topology);
if (ctx->options->key.vs.provoking_vtx_last) {
uint8_t outprim = si_conv_prim_to_gs_out(ctx->options->key.vs.topology);
provoking_vtx_in_prim = LLVMConstInt(ctx->ac.i32, outprim, false);
}
@ -1641,7 +1643,7 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
{
struct ac_ngg_prim prim = {0};
if (ctx->args->shader_info->is_ngg_passthrough) {
if (ctx->shader_info->is_ngg_passthrough) {
prim.passthrough = ac_get_arg(&ctx->ac, ctx->args->ac.gs_vtx_offset[0]);
} else {
prim.num_vertices = num_vertices;
@ -1658,8 +1660,8 @@ handle_ngg_outputs_post_2(struct radv_shader_context *ctx)
ac_build_ifcc(&ctx->ac, is_es_thread, 6002);
{
struct radv_vs_output_info *outinfo = ctx->stage == MESA_SHADER_TESS_EVAL
? &ctx->args->shader_info->tes.outinfo
: &ctx->args->shader_info->vs.outinfo;
? &ctx->shader_info->tes.outinfo
: &ctx->shader_info->vs.outinfo;
/* Exporting the primitive ID is handled below. */
/* TODO: use the new VS export path */
@ -1736,7 +1738,7 @@ gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
for (unsigned stream = 0; stream < 4; ++stream) {
unsigned num_components;
num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
num_components = ctx->shader_info->gs.num_stream_output_components[stream];
if (!num_components)
continue;
@ -1764,7 +1766,7 @@ gfx10_ngg_gs_emit_epilogue_1(struct radv_shader_context *ctx)
for (unsigned stream = 0; stream < 4; ++stream) {
unsigned num_components;
num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
num_components = ctx->shader_info->gs.num_stream_output_components[stream];
if (!num_components)
continue;
@ -1922,7 +1924,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
is_odd = LLVMBuildTrunc(builder, is_odd, ctx->ac.i1, "");
LLVMValueRef flatshade_first =
LLVMConstInt(ctx->ac.i1, !ctx->args->options->key.vs.provoking_vtx_last, false);
LLVMConstInt(ctx->ac.i1, !ctx->options->key.vs.provoking_vtx_last, false);
ac_build_triangle_strip_indices_to_triangle(&ctx->ac, is_odd, flatshade_first, prim.index);
}
@ -1935,8 +1937,8 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
tmp = LLVMBuildICmp(builder, LLVMIntULT, tid, vertlive_scan.result_reduce, "");
ac_build_ifcc(&ctx->ac, tmp, 5145);
{
struct radv_vs_output_info *outinfo = &ctx->args->shader_info->vs.outinfo;
bool export_view_index = ctx->args->options->key.has_multiview_view_index;
struct radv_vs_output_info *outinfo = &ctx->shader_info->vs.outinfo;
bool export_view_index = ctx->options->key.has_multiview_view_index;
struct radv_shader_output_values *outputs;
unsigned noutput = 0;
@ -1951,7 +1953,7 @@ gfx10_ngg_gs_emit_epilogue_2(struct radv_shader_context *ctx)
unsigned out_idx = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
int length = util_last_bit(output_usage_mask);
if (!(ctx->output_mask & (1ull << i)))
@ -2011,8 +2013,8 @@ gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMV
const LLVMValueRef vertexptr = ngg_gs_emit_vertex_ptr(ctx, get_thread_id_in_tg(ctx), vertexidx);
unsigned out_idx = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
uint8_t output_stream = ctx->args->shader_info->gs.output_streams[i];
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
uint8_t output_stream = ctx->shader_info->gs.output_streams[i];
LLVMValueRef *out_ptr = &addrs[i * 4];
int length = util_last_bit(output_usage_mask);
@ -2030,7 +2032,7 @@ gfx10_ngg_gs_emit_vertex(struct radv_shader_context *ctx, unsigned stream, LLVMV
LLVMBuildStore(builder, out_val, ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx));
}
}
assert(out_idx * 4 <= ctx->args->shader_info->gs.gsvs_vertex_size);
assert(out_idx * 4 <= ctx->shader_info->gs.gsvs_vertex_size);
/* Store the current number of emitted vertices to zero out remaining
* primitive flags in case the geometry shader doesn't emit the maximum
@ -2122,22 +2124,22 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
}
/* Process depth, stencil, samplemask. */
if (ctx->args->shader_info->ps.writes_z) {
if (ctx->shader_info->ps.writes_z) {
depth = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_DEPTH, 0));
}
if (ctx->args->shader_info->ps.writes_stencil) {
if (ctx->shader_info->ps.writes_stencil) {
stencil = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_STENCIL, 0));
}
if (ctx->args->shader_info->ps.writes_sample_mask) {
if (ctx->shader_info->ps.writes_sample_mask) {
samplemask = ac_to_float(&ctx->ac, radv_load_output(ctx, FRAG_RESULT_SAMPLE_MASK, 0));
}
/* Set the DONE bit on last non-null color export only if Z isn't
* exported.
*/
if (index > 0 && !ctx->args->shader_info->ps.writes_z &&
!ctx->args->shader_info->ps.writes_stencil &&
!ctx->args->shader_info->ps.writes_sample_mask) {
if (index > 0 && !ctx->shader_info->ps.writes_z &&
!ctx->shader_info->ps.writes_stencil &&
!ctx->shader_info->ps.writes_sample_mask) {
unsigned last = index - 1;
color_args[last].valid_mask = 1; /* whether the EXEC mask is valid */
@ -2157,7 +2159,7 @@ handle_fs_outputs_post(struct radv_shader_context *ctx)
static void
emit_gs_epilogue(struct radv_shader_context *ctx)
{
if (ctx->args->shader_info->is_ngg) {
if (ctx->shader_info->is_ngg) {
gfx10_ngg_gs_emit_epilogue_1(ctx);
return;
}
@ -2175,16 +2177,16 @@ handle_shader_outputs_post(struct ac_shader_abi *abi)
switch (ctx->stage) {
case MESA_SHADER_VERTEX:
if (ctx->args->shader_info->vs.as_ls)
if (ctx->shader_info->vs.as_ls)
break; /* Lowered in NIR */
else if (ctx->args->shader_info->vs.as_es)
else if (ctx->shader_info->vs.as_es)
break; /* Lowered in NIR */
else if (ctx->args->shader_info->is_ngg)
else if (ctx->shader_info->is_ngg)
break;
else
handle_vs_outputs_post(ctx, ctx->args->shader_info->vs.outinfo.export_prim_id,
ctx->args->shader_info->vs.outinfo.export_clip_dists,
&ctx->args->shader_info->vs.outinfo);
handle_vs_outputs_post(ctx, ctx->shader_info->vs.outinfo.export_prim_id,
ctx->shader_info->vs.outinfo.export_clip_dists,
&ctx->shader_info->vs.outinfo);
break;
case MESA_SHADER_FRAGMENT:
handle_fs_outputs_post(ctx);
@ -2195,14 +2197,14 @@ handle_shader_outputs_post(struct ac_shader_abi *abi)
case MESA_SHADER_TESS_CTRL:
break; /* Lowered in NIR */
case MESA_SHADER_TESS_EVAL:
if (ctx->args->shader_info->tes.as_es)
if (ctx->shader_info->tes.as_es)
break; /* Lowered in NIR */
else if (ctx->args->shader_info->is_ngg)
else if (ctx->shader_info->is_ngg)
break;
else
handle_vs_outputs_post(ctx, ctx->args->shader_info->tes.outinfo.export_prim_id,
ctx->args->shader_info->tes.outinfo.export_clip_dists,
&ctx->args->shader_info->tes.outinfo);
handle_vs_outputs_post(ctx, ctx->shader_info->tes.outinfo.export_prim_id,
ctx->shader_info->tes.outinfo.export_clip_dists,
&ctx->shader_info->tes.outinfo);
break;
default:
break;
@ -2210,8 +2212,7 @@ handle_shader_outputs_post(struct ac_shader_abi *abi)
}
static void
ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr,
const struct radv_nir_compiler_options *options)
ac_llvm_finalize_module(struct radv_shader_context *ctx, LLVMPassManagerRef passmgr)
{
LLVMRunPassManager(passmgr, ctx->ac.module);
LLVMDisposeBuilder(ctx->ac.builder);
@ -2231,15 +2232,15 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx)
case MESA_SHADER_GEOMETRY:
return;
case MESA_SHADER_VERTEX:
if (ctx->args->shader_info->vs.as_ls ||
ctx->args->shader_info->vs.as_es)
if (ctx->shader_info->vs.as_ls ||
ctx->shader_info->vs.as_es)
return;
outinfo = &ctx->args->shader_info->vs.outinfo;
outinfo = &ctx->shader_info->vs.outinfo;
break;
case MESA_SHADER_TESS_EVAL:
if (ctx->args->shader_info->tes.as_es)
if (ctx->shader_info->tes.as_es)
return;
outinfo = &ctx->args->shader_info->tes.outinfo;
outinfo = &ctx->shader_info->tes.outinfo;
break;
default:
unreachable("Unhandled shader type");
@ -2252,10 +2253,10 @@ ac_nir_eliminate_const_vs_outputs(struct radv_shader_context *ctx)
static void
ac_setup_rings(struct radv_shader_context *ctx)
{
if (ctx->args->options->chip_class <= GFX8 &&
if (ctx->options->chip_class <= GFX8 &&
(ctx->stage == MESA_SHADER_GEOMETRY ||
(ctx->stage == MESA_SHADER_VERTEX && ctx->args->shader_info->vs.as_es) ||
(ctx->stage == MESA_SHADER_TESS_EVAL && ctx->args->shader_info->tes.as_es))) {
(ctx->stage == MESA_SHADER_VERTEX && ctx->shader_info->vs.as_es) ||
(ctx->stage == MESA_SHADER_TESS_EVAL && ctx->shader_info->tes.as_es))) {
unsigned ring = ctx->stage == MESA_SHADER_GEOMETRY ? RING_ESGS_GS : RING_ESGS_VS;
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, ring, false);
@ -2288,7 +2289,7 @@ ac_setup_rings(struct radv_shader_context *ctx)
unsigned num_components, stride;
LLVMValueRef ring, tmp;
num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
num_components = ctx->shader_info->gs.num_stream_output_components[stream];
if (!num_components)
continue;
@ -2384,11 +2385,16 @@ declare_esgs_ring(struct radv_shader_context *ctx)
}
static LLVMModuleRef
ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *const *shaders,
int shader_count, const struct radv_shader_args *args)
ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
const struct radv_nir_compiler_options *options,
struct radv_shader_info *info,
struct nir_shader *const *shaders, int shader_count,
const struct radv_shader_args *args)
{
struct radv_shader_context ctx = {0};
ctx.args = args;
ctx.options = options;
ctx.shader_info = info;
enum ac_float_mode float_mode = AC_FLOAT_MODE_DEFAULT;
@ -2396,15 +2402,14 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
float_mode = AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO;
}
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
args->options->info, float_mode, args->shader_info->wave_size,
args->shader_info->ballot_bit_size);
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family,
options->info, float_mode, info->wave_size, info->ballot_bit_size);
ctx.context = ctx.ac.context;
ctx.max_workgroup_size = args->shader_info->workgroup_size;
ctx.max_workgroup_size = info->workgroup_size;
if (ctx.ac.chip_class >= GFX10) {
if (is_pre_gs_stage(shaders[0]->info.stage) && args->shader_info->is_ngg) {
if (is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg) {
ctx.max_workgroup_size = 128;
}
}
@ -2421,10 +2426,10 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
ctx.abi.load_ring_tess_offchip = load_ring_tess_offchip;
ctx.abi.load_ring_esgs = load_ring_esgs;
ctx.abi.clamp_shadow_reference = false;
ctx.abi.adjust_frag_coord_z = args->options->adjust_frag_coord_z;
ctx.abi.robust_buffer_access = args->options->robust_buffer_access;
ctx.abi.adjust_frag_coord_z = options->adjust_frag_coord_z;
ctx.abi.robust_buffer_access = options->robust_buffer_access;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && args->shader_info->is_ngg;
bool is_ngg = is_pre_gs_stage(shaders[0]->info.stage) && info->is_ngg;
if (shader_count >= 2 || is_ngg)
ac_init_exec_full_mask(&ctx.ac);
@ -2435,7 +2440,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
if (args->ac.instance_id.used)
ctx.abi.instance_id = ac_get_arg(&ctx.ac, args->ac.instance_id);
if (args->options->has_ls_vgpr_init_bug &&
if (options->has_ls_vgpr_init_bug &&
shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
ac_nir_fixup_ls_hs_input_vgprs(&ctx);
@ -2447,7 +2452,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
* Add an extra dword per vertex to ensure an odd stride, which
* avoids bank conflicts for SoA accesses.
*/
if (!args->shader_info->is_ngg_passthrough)
if (!info->is_ngg_passthrough)
declare_esgs_ring(&ctx);
/* GFX10 hang workaround - there needs to be an s_barrier before gs_alloc_req always */
@ -2464,7 +2469,7 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
for (int i = 0; i < 4; i++) {
ctx.gs_next_vertex[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
}
if (args->shader_info->is_ngg) {
if (info->is_ngg) {
for (unsigned i = 0; i < 4; ++i) {
ctx.gs_curprim_verts[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
ctx.gs_generated_prims[i] = ac_build_alloca(&ctx.ac, ctx.ac.i32, "");
@ -2492,17 +2497,15 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
ctx.abi.load_sample_mask_in = load_sample_mask_in;
}
if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX &&
args->shader_info->is_ngg &&
args->shader_info->vs.outinfo.export_prim_id) {
if (shaders[shader_idx]->info.stage == MESA_SHADER_VERTEX && info->is_ngg &&
info->vs.outinfo.export_prim_id) {
declare_esgs_ring(&ctx);
}
bool nested_barrier = false;
if (shader_idx) {
if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
args->shader_info->is_ngg) {
if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg) {
gfx10_ngg_gs_emit_prologue(&ctx);
nested_barrier = false;
} else {
@ -2565,25 +2568,24 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm, struct nir_shader *co
/* This needs to be outside the if wrapping the shader body, as sometimes
* the HW generates waves with 0 es/vs threads. */
if (is_pre_gs_stage(shaders[shader_idx]->info.stage) &&
args->shader_info->is_ngg && shader_idx == shader_count - 1) {
if (is_pre_gs_stage(shaders[shader_idx]->info.stage) && info->is_ngg &&
shader_idx == shader_count - 1) {
handle_ngg_outputs_post_2(&ctx);
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY &&
args->shader_info->is_ngg) {
} else if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg) {
gfx10_ngg_gs_emit_epilogue_2(&ctx);
}
}
LLVMBuildRetVoid(ctx.ac.builder);
if (args->options->dump_preoptir) {
if (options->dump_preoptir) {
fprintf(stderr, "%s LLVM IR:\n\n",
radv_get_shader_name(args->shader_info, shaders[shader_count - 1]->info.stage));
radv_get_shader_name(info, shaders[shader_count - 1]->info.stage));
ac_dump_module(ctx.ac.module);
fprintf(stderr, "\n");
}
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr);
if (shader_count == 1)
ac_nir_eliminate_const_vs_outputs(&ctx);
@ -2673,18 +2675,21 @@ ac_compile_llvm_module(struct ac_llvm_compiler *ac_llvm, LLVMModuleRef llvm_modu
}
static void
radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm, struct radv_shader_binary **rbinary,
radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
const struct radv_nir_compiler_options *options,
struct radv_shader_info *info,
struct radv_shader_binary **rbinary,
const struct radv_shader_args *args, struct nir_shader *const *nir,
int nir_count)
{
LLVMModuleRef llvm_module;
llvm_module = ac_translate_nir_to_llvm(ac_llvm, nir, nir_count, args);
llvm_module = ac_translate_nir_to_llvm(ac_llvm, options, info, nir, nir_count, args);
ac_compile_llvm_module(ac_llvm, llvm_module, rbinary, nir[nir_count - 1]->info.stage,
radv_get_shader_name(args->shader_info, nir[nir_count - 1]->info.stage),
args->options);
radv_get_shader_name(info, nir[nir_count - 1]->info.stage),
options);
}
static void
@ -2696,7 +2701,7 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
LLVMValueRef stream_id;
/* Fetch the vertex stream ID. */
if (ctx->args->shader_info->so.num_outputs) {
if (ctx->shader_info->so.num_outputs) {
stream_id =
ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ac.streamout_config), 24, 2);
} else {
@ -2710,14 +2715,14 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
switch_inst = LLVMBuildSwitch(ctx->ac.builder, stream_id, end_bb, 4);
for (unsigned stream = 0; stream < 4; stream++) {
unsigned num_components = ctx->args->shader_info->gs.num_stream_output_components[stream];
unsigned num_components = ctx->shader_info->gs.num_stream_output_components[stream];
LLVMBasicBlockRef bb;
unsigned offset;
if (stream > 0 && !num_components)
continue;
if (stream > 0 && !ctx->args->shader_info->so.num_outputs)
if (stream > 0 && !ctx->shader_info->so.num_outputs)
continue;
bb = LLVMInsertBasicBlockInContext(ctx->ac.context, end_bb, "out");
@ -2726,8 +2731,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
offset = 0;
for (unsigned i = 0; i < AC_LLVM_MAX_OUTPUTS; ++i) {
unsigned output_usage_mask = ctx->args->shader_info->gs.output_usage_mask[i];
unsigned output_stream = ctx->args->shader_info->gs.output_streams[i];
unsigned output_usage_mask = ctx->shader_info->gs.output_usage_mask[i];
unsigned output_stream = ctx->shader_info->gs.output_streams[i];
int length = util_last_bit(output_usage_mask);
if (!(ctx->output_mask & (1ull << i)) || output_stream != stream)
@ -2758,12 +2763,12 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
}
}
if (ctx->args->shader_info->so.num_outputs)
if (ctx->shader_info->so.num_outputs)
radv_emit_streamout(ctx, stream);
if (stream == 0) {
handle_vs_outputs_post(ctx, false, ctx->args->shader_info->vs.outinfo.export_clip_dists,
&ctx->args->shader_info->vs.outinfo);
handle_vs_outputs_post(ctx, false, ctx->shader_info->vs.outinfo.export_clip_dists,
&ctx->shader_info->vs.outinfo);
}
LLVMBuildBr(ctx->ac.builder, end_bb);
@ -2773,17 +2778,22 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
}
static void
radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader *geom_shader,
radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm,
const struct radv_nir_compiler_options *options,
struct radv_shader_info *info,
struct nir_shader *geom_shader,
struct radv_shader_binary **rbinary,
const struct radv_shader_args *args)
{
struct radv_shader_context ctx = {0};
ctx.args = args;
ctx.options = options;
ctx.shader_info = info;
assert(args->is_gs_copy_shader);
ac_llvm_context_init(&ctx.ac, ac_llvm, args->options->chip_class, args->options->family,
args->options->info, AC_FLOAT_MODE_DEFAULT, 64, 64);
ac_llvm_context_init(&ctx.ac, ac_llvm, options->chip_class, options->family,
options->info, AC_FLOAT_MODE_DEFAULT, 64, 64);
ctx.context = ctx.ac.context;
ctx.stage = MESA_SHADER_VERTEX;
@ -2803,31 +2813,31 @@ radv_compile_gs_copy_shader(struct ac_llvm_compiler *ac_llvm, struct nir_shader
LLVMBuildRetVoid(ctx.ac.builder);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr, args->options);
ac_llvm_finalize_module(&ctx, ac_llvm->passmgr);
ac_compile_llvm_module(ac_llvm, ctx.ac.module, rbinary, MESA_SHADER_VERTEX, "GS Copy Shader",
args->options);
options);
(*rbinary)->is_gs_copy_shader = true;
}
void
llvm_compile_shader(struct radv_device *device, unsigned shader_count,
llvm_compile_shader(const struct radv_nir_compiler_options *options,
struct radv_shader_info *info, unsigned shader_count,
struct nir_shader *const *shaders, struct radv_shader_binary **binary,
struct radv_shader_args *args)
const struct radv_shader_args *args)
{
enum ac_target_machine_options tm_options = 0;
struct ac_llvm_compiler ac_llvm;
tm_options |= AC_TM_SUPPORTS_SPILL;
if (args->options->check_ir)
if (options->check_ir)
tm_options |= AC_TM_CHECK_IR;
radv_init_llvm_compiler(&ac_llvm, args->options->family, tm_options,
args->shader_info->wave_size);
radv_init_llvm_compiler(&ac_llvm, options->family, tm_options, info->wave_size);
if (args->is_gs_copy_shader) {
radv_compile_gs_copy_shader(&ac_llvm, *shaders, binary, args);
radv_compile_gs_copy_shader(&ac_llvm, options, info, *shaders, binary, args);
} else {
radv_compile_nir_shader(&ac_llvm, binary, args, shaders, shader_count);
radv_compile_nir_shader(&ac_llvm, options, info, binary, args, shaders, shader_count);
}
}

View file

@ -2621,10 +2621,13 @@ struct radv_fence {
/* radv_nir_to_llvm.c */
struct radv_shader_args;
struct radv_nir_compiler_options;
struct radv_shader_info;
void llvm_compile_shader(struct radv_device *device, unsigned shader_count,
void llvm_compile_shader(const struct radv_nir_compiler_options *options,
struct radv_shader_info *info, unsigned shader_count,
struct nir_shader *const *shaders, struct radv_shader_binary **binary,
struct radv_shader_args *args);
const struct radv_shader_args *args);
/* radv_shader_info.h */
struct radv_shader_info;

View file

@ -1782,27 +1782,25 @@ shader_variant_compile(struct radv_device *device, struct vk_shader_module *modu
}
struct radv_shader_args args = {0};
args.options = options;
args.shader_info = info;
args.is_gs_copy_shader = gs_copy_shader;
args.is_trap_handler_shader = trap_handler_shader;
radv_declare_shader_args(
&args, gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage,
radv_declare_shader_args(options, info,
gs_copy_shader ? MESA_SHADER_VERTEX : shaders[shader_count - 1]->info.stage,
shader_count >= 2,
shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX, &args);
#ifdef LLVM_AVAILABLE
if (radv_use_llvm_for_stage(device, stage) || options->dump_shader || options->record_ir)
ac_init_llvm_once();
if (radv_use_llvm_for_stage(device, stage)) {
llvm_compile_shader(device, shader_count, shaders, &binary, &args);
llvm_compile_shader(options, info, shader_count, shaders, &binary, &args);
#else
if (false) {
#endif
} else {
aco_compile_shader(shader_count, shaders, &binary, &args);
aco_compile_shader(options, info, shader_count, shaders, &args, &binary);
}
binary->info = *info;
@ -1962,10 +1960,8 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
info.is_ngg = key->is_ngg;
struct radv_shader_args args = {0};
args.options = &options;
args.shader_info = &info;
radv_declare_shader_args(&args, key->next_stage, key->next_stage != MESA_SHADER_VERTEX,
MESA_SHADER_VERTEX);
radv_declare_shader_args(&options, &info, key->next_stage, key->next_stage != MESA_SHADER_VERTEX,
MESA_SHADER_VERTEX, &args);
#ifdef LLVM_AVAILABLE
if (options.dump_shader)
@ -1973,7 +1969,7 @@ radv_create_vs_prolog(struct radv_device *device, const struct radv_vs_prolog_ke
#endif
struct radv_prolog_binary *binary = NULL;
aco_compile_vs_prolog(key, &binary, &args);
aco_compile_vs_prolog(&options, &info, key, &args, &binary);
struct radv_shader_prolog *prolog = upload_vs_prolog(device, binary, info.wave_size);
free(binary);

View file

@ -39,26 +39,26 @@ set_loc(struct radv_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs
}
static void
set_loc_shader(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
set_loc_shader(struct radv_shader_info *info, int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
{
struct radv_userdata_info *ud_info = &args->shader_info->user_sgprs_locs.shader_data[idx];
struct radv_userdata_info *ud_info = &info->user_sgprs_locs.shader_data[idx];
assert(ud_info);
set_loc(ud_info, sgpr_idx, num_sgprs);
}
static void
set_loc_shader_ptr(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
set_loc_shader_ptr(struct radv_shader_info*info, int idx, uint8_t *sgpr_idx)
{
bool use_32bit_pointers = idx != AC_UD_SCRATCH_RING_OFFSETS;
set_loc_shader(args, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
set_loc_shader(info, idx, sgpr_idx, use_32bit_pointers ? 1 : 2);
}
static void
set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
set_loc_desc(struct radv_shader_info *info, int idx, uint8_t *sgpr_idx)
{
struct radv_userdata_locations *locs = &args->shader_info->user_sgprs_locs;
struct radv_userdata_locations *locs = &info->user_sgprs_locs;
struct radv_userdata_info *ud_info = &locs->descriptor_sets[idx];
assert(ud_info);
@ -75,27 +75,28 @@ struct user_sgpr_info {
};
static bool
needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
needs_view_index_sgpr(const struct radv_nir_compiler_options *options,
const struct radv_shader_info *info, gl_shader_stage stage)
{
switch (stage) {
case MESA_SHADER_VERTEX:
if (args->shader_info->uses_view_index ||
(!args->shader_info->vs.as_es && !args->shader_info->vs.as_ls &&
args->options->key.has_multiview_view_index))
if (info->uses_view_index ||
(!info->vs.as_es && !info->vs.as_ls &&
options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_TESS_EVAL:
if (args->shader_info->uses_view_index ||
(!args->shader_info->tes.as_es && args->options->key.has_multiview_view_index))
if (info->uses_view_index ||
(!info->tes.as_es && options->key.has_multiview_view_index))
return true;
break;
case MESA_SHADER_TESS_CTRL:
if (args->shader_info->uses_view_index)
if (info->uses_view_index)
return true;
break;
case MESA_SHADER_GEOMETRY:
if (args->shader_info->uses_view_index ||
(args->shader_info->is_ngg && args->options->key.has_multiview_view_index))
if (info->uses_view_index ||
(info->is_ngg && options->key.has_multiview_view_index))
return true;
break;
default:
@ -105,52 +106,53 @@ needs_view_index_sgpr(struct radv_shader_args *args, gl_shader_stage stage)
}
static uint8_t
count_vs_user_sgprs(struct radv_shader_args *args)
count_vs_user_sgprs(const struct radv_shader_info *info)
{
uint8_t count = 1; /* vertex offset */
if (args->shader_info->vs.vb_desc_usage_mask)
if (info->vs.vb_desc_usage_mask)
count++;
if (args->shader_info->vs.needs_draw_id)
if (info->vs.needs_draw_id)
count++;
if (args->shader_info->vs.needs_base_instance)
if (info->vs.needs_base_instance)
count++;
return count;
}
static unsigned
count_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
count_ngg_sgprs(const struct radv_shader_info *info, bool has_api_gs)
{
unsigned count = 0;
if (has_api_gs)
count += 1; /* ngg_gs_state */
if (args->shader_info->has_ngg_culling)
if (info->has_ngg_culling)
count += 5; /* ngg_culling_settings + 4x ngg_viewport_* */
return count;
}
static void
allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info *user_sgpr_info)
allocate_inline_push_consts(const struct radv_shader_info *info,
struct user_sgpr_info *user_sgpr_info)
{
uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
/* Only supported if shaders use push constants. */
if (args->shader_info->min_push_constant_used == UINT8_MAX)
if (info->min_push_constant_used == UINT8_MAX)
return;
/* Only supported if shaders don't have indirect push constants. */
if (args->shader_info->has_indirect_push_constants)
if (info->has_indirect_push_constants)
return;
/* Only supported for 32-bit push constants. */
if (!args->shader_info->has_only_32bit_push_constants)
if (!info->has_only_32bit_push_constants)
return;
uint8_t num_push_consts =
(args->shader_info->max_push_constant_used - args->shader_info->min_push_constant_used) / 4;
(info->max_push_constant_used - info->min_push_constant_used) / 4;
/* Check if the number of user SGPRs is large enough. */
if (num_push_consts < remaining_sgprs) {
@ -164,7 +166,7 @@ allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info
user_sgpr_info->num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
if (user_sgpr_info->num_inline_push_consts == num_push_consts &&
!args->shader_info->loads_dynamic_offsets) {
!info->loads_dynamic_offsets) {
/* Disable the default push constants path if all constants are
* inlined and if shaders don't use dynamic descriptors.
*/
@ -173,9 +175,10 @@ allocate_inline_push_consts(struct radv_shader_args *args, struct user_sgpr_info
}
static void
allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool has_previous_stage,
gl_shader_stage previous_stage, bool needs_view_index, bool has_api_gs,
struct user_sgpr_info *user_sgpr_info)
allocate_user_sgprs(const struct radv_nir_compiler_options *options,
const struct radv_shader_info *info, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage, bool needs_view_index,
bool has_api_gs, bool is_gs_copy_shader, struct user_sgpr_info *user_sgpr_info)
{
uint8_t user_sgpr_count = 0;
@ -185,39 +188,39 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
user_sgpr_count += 2;
/* prolog inputs */
if (args->shader_info->vs.has_prolog)
if (info->vs.has_prolog)
user_sgpr_count += 2;
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->shader_info->cs.uses_sbt)
if (info->cs.uses_sbt)
user_sgpr_count += 1;
if (args->shader_info->cs.uses_grid_size)
if (info->cs.uses_grid_size)
user_sgpr_count += 3;
if (args->shader_info->cs.uses_ray_launch_size)
if (info->cs.uses_ray_launch_size)
user_sgpr_count += 3;
break;
case MESA_SHADER_FRAGMENT:
break;
case MESA_SHADER_VERTEX:
if (!args->is_gs_copy_shader)
user_sgpr_count += count_vs_user_sgprs(args);
if (!is_gs_copy_shader)
user_sgpr_count += count_vs_user_sgprs(info);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
if (previous_stage == MESA_SHADER_VERTEX)
user_sgpr_count += count_vs_user_sgprs(args);
user_sgpr_count += count_vs_user_sgprs(info);
}
break;
case MESA_SHADER_TESS_EVAL:
break;
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
if (args->shader_info->is_ngg)
user_sgpr_count += count_ngg_sgprs(args, has_api_gs);
if (info->is_ngg)
user_sgpr_count += count_ngg_sgprs(info, has_api_gs);
if (previous_stage == MESA_SHADER_VERTEX) {
user_sgpr_count += count_vs_user_sgprs(args);
user_sgpr_count += count_vs_user_sgprs(info);
}
}
break;
@ -228,16 +231,16 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
if (needs_view_index)
user_sgpr_count++;
if (args->shader_info->loads_push_constants)
if (info->loads_push_constants)
user_sgpr_count++;
if (args->shader_info->so.num_outputs)
if (info->so.num_outputs)
user_sgpr_count++;
uint32_t available_sgprs =
args->options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
options->chip_class >= GFX9 && stage != MESA_SHADER_COMPUTE ? 32 : 16;
uint32_t remaining_sgprs = available_sgprs - user_sgpr_count;
uint32_t num_desc_set = util_bitcount(args->shader_info->desc_set_used_mask);
uint32_t num_desc_set = util_bitcount(info->desc_set_used_mask);
if (remaining_sgprs < num_desc_set) {
user_sgpr_info->indirect_all_descriptor_sets = true;
@ -246,16 +249,17 @@ allocate_user_sgprs(struct radv_shader_args *args, gl_shader_stage stage, bool h
user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
}
allocate_inline_push_consts(args, user_sgpr_info);
allocate_inline_push_consts(info, user_sgpr_info);
}
static void
declare_global_input_sgprs(struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info)
declare_global_input_sgprs(const struct radv_shader_info *info,
const struct user_sgpr_info *user_sgpr_info,
struct radv_shader_args *args)
{
/* 1 for each descriptor set */
if (!user_sgpr_info->indirect_all_descriptor_sets) {
uint32_t mask = args->shader_info->desc_set_used_mask;
uint32_t mask = info->desc_set_used_mask;
while (mask) {
int i = u_bit_scan(&mask);
@ -266,7 +270,7 @@ declare_global_input_sgprs(struct radv_shader_args *args,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR_PTR, &args->descriptor_sets[0]);
}
if (args->shader_info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
if (info->loads_push_constants && !user_sgpr_info->inlined_all_push_consts) {
/* 1 for push constants and dynamic descriptors */
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_PTR, &args->ac.push_constants);
}
@ -274,43 +278,45 @@ declare_global_input_sgprs(struct radv_shader_args *args,
for (unsigned i = 0; i < user_sgpr_info->num_inline_push_consts; i++) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.inline_push_consts[i]);
}
args->ac.base_inline_push_consts = args->shader_info->min_push_constant_used / 4;
args->ac.base_inline_push_consts = info->min_push_constant_used / 4;
if (args->shader_info->so.num_outputs) {
if (info->so.num_outputs) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->streamout_buffers);
}
}
static void
declare_vs_specific_input_sgprs(struct radv_shader_args *args, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage)
declare_vs_specific_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
gl_shader_stage stage, bool has_previous_stage,
gl_shader_stage previous_stage)
{
if (args->shader_info->vs.has_prolog)
if (info->vs.has_prolog)
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->prolog_inputs);
if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->shader_info->vs.vb_desc_usage_mask) {
if (info->vs.vb_desc_usage_mask) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.vertex_buffers);
}
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
if (args->shader_info->vs.needs_draw_id) {
if (info->vs.needs_draw_id) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
}
if (args->shader_info->vs.needs_base_instance) {
if (info->vs.needs_base_instance) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.start_instance);
}
}
}
static void
declare_vs_input_vgprs(struct radv_shader_args *args)
declare_vs_input_vgprs(const struct radv_nir_compiler_options *options,
const struct radv_shader_info *info, struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vertex_id);
if (!args->is_gs_copy_shader) {
if (args->shader_info->vs.as_ls) {
if (info->vs.as_ls) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.vs_rel_patch_id);
if (args->options->chip_class >= GFX10) {
if (options->chip_class >= GFX10) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
} else {
@ -318,8 +324,8 @@ declare_vs_input_vgprs(struct radv_shader_args *args)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* unused */
}
} else {
if (args->options->chip_class >= GFX10) {
if (args->shader_info->is_ngg) {
if (options->chip_class >= GFX10) {
if (info->is_ngg) {
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* user vgpr */
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.instance_id);
@ -336,9 +342,9 @@ declare_vs_input_vgprs(struct radv_shader_args *args)
}
}
if (args->shader_info->vs.dynamic_inputs) {
assert(args->shader_info->vs.use_per_attribute_vb_descs);
unsigned num_attributes = util_last_bit(args->shader_info->vs.vb_desc_usage_mask);
if (info->vs.dynamic_inputs) {
assert(info->vs.use_per_attribute_vb_descs);
unsigned num_attributes = util_last_bit(info->vs.vb_desc_usage_mask);
for (unsigned i = 0; i < num_attributes; i++)
ac_add_arg(&args->ac, AC_ARG_VGPR, 4, AC_ARG_INT, &args->vs_inputs[i]);
/* Ensure the main shader doesn't use less vgprs than the prolog. The prolog requires one
@ -349,12 +355,13 @@ declare_vs_input_vgprs(struct radv_shader_args *args)
}
static void
declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
declare_streamout_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
gl_shader_stage stage)
{
int i;
/* Streamout SGPRs. */
if (args->shader_info->so.num_outputs) {
if (info->so.num_outputs) {
assert(stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_config);
@ -365,7 +372,7 @@ declare_streamout_sgprs(struct radv_shader_args *args, gl_shader_stage stage)
/* A streamout buffer offset is loaded if the stride is non-zero. */
for (i = 0; i < 4; i++) {
if (!args->shader_info->so.strides[i])
if (!info->so.strides[i])
continue;
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.streamout_offset[i]);
@ -382,9 +389,10 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
}
static void
declare_ps_input_vgprs(struct radv_shader_args *args)
declare_ps_input_vgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
bool remap_spi_ps_input)
{
unsigned spi_ps_input = args->shader_info->ps.spi_ps_input;
unsigned spi_ps_input = info->ps.spi_ps_input;
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
@ -403,7 +411,7 @@ declare_ps_input_vgprs(struct radv_shader_args *args)
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.sample_coverage);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* fixed pt */
if (args->options->remap_spi_ps_input) {
if (remap_spi_ps_input) {
/* LLVM optimizes away unused FS inputs and computes spi_ps_input_addr itself and then
* communicates the results back via the ELF binary. Mirror what LLVM does by re-mapping the
* VGPR arguments here.
@ -428,13 +436,14 @@ declare_ps_input_vgprs(struct radv_shader_args *args)
}
static void
declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
declare_ngg_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args,
bool has_api_gs)
{
if (has_api_gs) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_gs_state);
}
if (args->shader_info->has_ngg_culling) {
if (info->has_ngg_culling) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_culling_settings);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[0]);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ngg_viewport_scale[1]);
@ -444,22 +453,22 @@ declare_ngg_sgprs(struct radv_shader_args *args, bool has_api_gs)
}
static void
set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info *user_sgpr_info,
uint8_t *user_sgpr_idx)
set_global_input_locs(struct radv_shader_info *info, struct radv_shader_args *args,
const struct user_sgpr_info *user_sgpr_info, uint8_t *user_sgpr_idx)
{
unsigned num_inline_push_consts = 0;
if (!user_sgpr_info->indirect_all_descriptor_sets) {
for (unsigned i = 0; i < ARRAY_SIZE(args->descriptor_sets); i++) {
if (args->descriptor_sets[i].used)
set_loc_desc(args, i, user_sgpr_idx);
set_loc_desc(info, i, user_sgpr_idx);
}
} else {
set_loc_shader_ptr(args, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_INDIRECT_DESCRIPTOR_SETS, user_sgpr_idx);
}
if (args->ac.push_constants.used) {
set_loc_shader_ptr(args, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
}
for (unsigned i = 0; i < ARRAY_SIZE(args->ac.inline_push_consts); i++) {
@ -468,31 +477,31 @@ set_global_input_locs(struct radv_shader_args *args, const struct user_sgpr_info
}
if (num_inline_push_consts) {
set_loc_shader(args, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
set_loc_shader(info, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx, num_inline_push_consts);
}
if (args->streamout_buffers.used) {
set_loc_shader_ptr(args, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_STREAMOUT_BUFFERS, user_sgpr_idx);
}
}
static void
set_vs_specific_input_locs(struct radv_shader_args *args, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage,
uint8_t *user_sgpr_idx)
set_vs_specific_input_locs(struct radv_shader_info *info, struct radv_shader_args *args,
gl_shader_stage stage, bool has_previous_stage,
gl_shader_stage previous_stage, uint8_t *user_sgpr_idx)
{
if (args->prolog_inputs.used)
set_loc_shader(args, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
set_loc_shader(info, AC_UD_VS_PROLOG_INPUTS, user_sgpr_idx, 2);
if (!args->is_gs_copy_shader && (stage == MESA_SHADER_VERTEX ||
(has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
if (args->ac.vertex_buffers.used) {
set_loc_shader_ptr(args, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx);
}
unsigned vs_num = args->ac.base_vertex.used + args->ac.draw_id.used +
args->ac.start_instance.used;
set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
set_loc_shader(info, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
}
}
@ -504,15 +513,17 @@ is_pre_gs_stage(gl_shader_stage stage)
}
void
radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage)
radv_declare_shader_args(const struct radv_nir_compiler_options *options,
struct radv_shader_info *info, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage,
struct radv_shader_args *args)
{
struct user_sgpr_info user_sgpr_info;
bool needs_view_index = needs_view_index_sgpr(args, stage);
bool needs_view_index = needs_view_index_sgpr(options, info, stage);
bool has_api_gs = stage == MESA_SHADER_GEOMETRY;
if (args->options->chip_class >= GFX10) {
if (is_pre_gs_stage(stage) && args->shader_info->is_ngg) {
if (options->chip_class >= GFX10) {
if (is_pre_gs_stage(stage) && info->is_ngg) {
/* On GFX10, VS is merged into GS for NGG. */
previous_stage = stage;
stage = MESA_SHADER_GEOMETRY;
@ -521,14 +532,14 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
}
for (int i = 0; i < MAX_SETS; i++)
args->shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
for (int i = 0; i < AC_UD_MAX_UD; i++)
args->shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
allocate_user_sgprs(args, stage, has_previous_stage, previous_stage, needs_view_index,
has_api_gs, &user_sgpr_info);
allocate_user_sgprs(options, info, stage, has_previous_stage, previous_stage, needs_view_index,
has_api_gs, args->is_gs_copy_shader, &user_sgpr_info);
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR, &args->ring_offsets);
}
@ -538,31 +549,31 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
switch (stage) {
case MESA_SHADER_COMPUTE:
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (args->shader_info->cs.uses_sbt) {
if (info->cs.uses_sbt) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_CONST_DESC_PTR, &args->ac.sbt_descriptors);
}
if (args->shader_info->cs.uses_grid_size) {
if (info->cs.uses_grid_size) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
}
if (args->shader_info->cs.uses_ray_launch_size) {
if (info->cs.uses_ray_launch_size) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.ray_launch_size);
}
for (int i = 0; i < 3; i++) {
if (args->shader_info->cs.uses_block_id[i]) {
if (info->cs.uses_block_id[i]) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.workgroup_ids[i]);
}
}
if (args->shader_info->cs.uses_local_invocation_idx) {
if (info->cs.uses_local_invocation_idx) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tg_size);
}
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
@ -570,29 +581,29 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
break;
case MESA_SHADER_VERTEX:
/* NGG is handled by the GS case */
assert(!args->shader_info->is_ngg);
assert(!info->is_ngg);
declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
}
if (args->shader_info->vs.as_es) {
if (info->vs.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
} else if (args->shader_info->vs.as_ls) {
} else if (info->vs.as_ls) {
/* no extra parameters */
} else {
declare_streamout_sgprs(args, stage);
declare_streamout_sgprs(info, args, stage);
}
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
declare_vs_input_vgprs(args);
declare_vs_input_vgprs(options, info, args);
break;
case MESA_SHADER_TESS_CTRL:
if (has_previous_stage) {
@ -605,9 +616,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
@ -616,9 +627,9 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_rel_ids);
declare_vs_input_vgprs(args);
declare_vs_input_vgprs(options, info, args);
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
@ -626,7 +637,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tcs_factor_offset);
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.tcs_patch_id);
@ -635,22 +646,22 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
break;
case MESA_SHADER_TESS_EVAL:
/* NGG is handled by the GS case */
assert(!args->shader_info->is_ngg);
assert(!info->is_ngg);
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index)
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
if (args->shader_info->tes.as_es) {
if (info->tes.as_es) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.es2gs_offset);
} else {
declare_streamout_sgprs(args, stage);
declare_streamout_sgprs(info, args, stage);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.tess_offchip_offset);
}
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
declare_tes_input_vgprs(args);
@ -658,7 +669,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
case MESA_SHADER_GEOMETRY:
if (has_previous_stage) {
// First 6 system regs
if (args->shader_info->is_ngg) {
if (info->is_ngg) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_tg_info);
} else {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
@ -672,17 +683,17 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
if (previous_stage != MESA_SHADER_TESS_EVAL) {
declare_vs_specific_input_sgprs(args, stage, has_previous_stage, previous_stage);
declare_vs_specific_input_sgprs(info, args, stage, has_previous_stage, previous_stage);
}
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
}
if (args->shader_info->is_ngg) {
declare_ngg_sgprs(args, has_api_gs);
if (info->is_ngg) {
declare_ngg_sgprs(info, args, has_api_gs);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
@ -692,12 +703,12 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[2]);
if (previous_stage == MESA_SHADER_VERTEX) {
declare_vs_input_vgprs(args);
declare_vs_input_vgprs(options, info, args);
} else {
declare_tes_input_vgprs(args);
}
} else {
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
if (needs_view_index) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.view_index);
@ -705,7 +716,7 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs2vs_offset);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.gs_wave_id);
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT, &args->ac.gs_vtx_offset[0]);
@ -719,27 +730,27 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
}
break;
case MESA_SHADER_FRAGMENT:
declare_global_input_sgprs(args, &user_sgpr_info);
declare_global_input_sgprs(info, &user_sgpr_info, args);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
if (args->options->explicit_scratch_args) {
if (options->explicit_scratch_args) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.scratch_offset);
}
declare_ps_input_vgprs(args);
declare_ps_input_vgprs(info, args, options->remap_spi_ps_input);
break;
default:
unreachable("Shader stage not implemented");
}
args->shader_info->num_input_vgprs = 0;
args->shader_info->num_input_sgprs = 2;
args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
args->shader_info->num_input_vgprs = args->ac.num_vgprs_used;
info->num_input_vgprs = 0;
info->num_input_sgprs = 2;
info->num_input_sgprs += args->ac.num_sgprs_used;
info->num_input_vgprs = args->ac.num_vgprs_used;
uint8_t user_sgpr_idx = 0;
set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx);
/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
* the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
@ -747,51 +758,51 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
user_sgpr_idx = 0;
if (stage == MESA_SHADER_VERTEX || (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))
set_vs_specific_input_locs(args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
set_vs_specific_input_locs(info, args, stage, has_previous_stage, previous_stage, &user_sgpr_idx);
set_global_input_locs(args, &user_sgpr_info, &user_sgpr_idx);
set_global_input_locs(info, args, &user_sgpr_info, &user_sgpr_idx);
switch (stage) {
case MESA_SHADER_COMPUTE:
if (args->ac.sbt_descriptors.used) {
set_loc_shader_ptr(args, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
set_loc_shader_ptr(info, AC_UD_CS_SBT_DESCRIPTORS, &user_sgpr_idx);
}
if (args->ac.num_work_groups.used) {
set_loc_shader(args, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
set_loc_shader(info, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, 3);
}
if (args->ac.ray_launch_size.used) {
set_loc_shader(args, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
set_loc_shader(info, AC_UD_CS_RAY_LAUNCH_SIZE, &user_sgpr_idx, 3);
}
break;
case MESA_SHADER_VERTEX:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_CTRL:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_TESS_EVAL:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
break;
case MESA_SHADER_GEOMETRY:
if (args->ac.view_index.used)
set_loc_shader(args, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
if (args->ngg_gs_state.used) {
set_loc_shader(args, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_NGG_GS_STATE, &user_sgpr_idx, 1);
}
if (args->ngg_culling_settings.used) {
set_loc_shader(args, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
set_loc_shader(info, AC_UD_NGG_CULLING_SETTINGS, &user_sgpr_idx, 1);
}
if (args->ngg_viewport_scale[0].used) {
assert(args->ngg_viewport_scale[1].used &&
args->ngg_viewport_translate[0].used &&
args->ngg_viewport_translate[1].used);
set_loc_shader(args, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
set_loc_shader(info, AC_UD_NGG_VIEWPORT, &user_sgpr_idx, 4);
}
break;
case MESA_SHADER_FRAGMENT:
@ -800,5 +811,5 @@ radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
unreachable("Shader stage not implemented");
}
args->shader_info->num_user_sgprs = user_sgpr_idx;
info->num_user_sgprs = user_sgpr_idx;
}

View file

@ -30,8 +30,6 @@
struct radv_shader_args {
struct ac_shader_args ac;
struct radv_shader_info *shader_info;
const struct radv_nir_compiler_options *options;
struct ac_arg descriptor_sets[MAX_SETS];
struct ac_arg ring_offsets;
@ -58,5 +56,10 @@ radv_shader_args_from_ac(struct ac_shader_args *args)
return container_of(args, struct radv_shader_args, ac);
}
void radv_declare_shader_args(struct radv_shader_args *args, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage);
struct radv_nir_compiler_options;
struct radv_shader_info;
void radv_declare_shader_args(const struct radv_nir_compiler_options *options,
struct radv_shader_info *info, gl_shader_stage stage,
bool has_previous_stage, gl_shader_stage previous_stage,
struct radv_shader_args *args);