aco: move info pointer to a copy.

This is just setup to move this to a different struct later.

Reviewed-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16342>
This commit is contained in:
Dave Airlie 2022-05-05 11:32:53 +10:00 committed by Marge Bot
parent 29fbc88d6b
commit a2701bfdb8
8 changed files with 66 additions and 66 deletions

View file

@ -770,7 +770,7 @@ insert_wait_states(Program* program)
std::stack<unsigned, std::vector<unsigned>> loop_header_indices;
unsigned loop_progress = 0;
if (program->stage.has(SWStage::VS) && program->info->vs.dynamic_inputs) {
if (program->stage.has(SWStage::VS) && program->info.vs.dynamic_inputs) {
for (Definition def : program->vs_inputs) {
update_counters(in_ctx[0], event_vmem);
insert_wait_entry(in_ctx[0], def, event_vmem);

View file

@ -5217,7 +5217,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
nir_src offset = *nir_get_io_offset_src(instr);
if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->program->info->vs.dynamic_inputs) {
if (ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->program->info.vs.dynamic_inputs) {
if (!nir_src_is_const(offset) || nir_src_as_uint(offset))
isel_err(offset.ssa->parent_instr,
"Unimplemented non-zero nir_intrinsic_load_input offset");
@ -5272,8 +5272,8 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels);
unsigned desc_index =
ctx->program->info->vs.use_per_attribute_vb_descs ? location : attrib_binding;
desc_index = util_bitcount(ctx->program->info->vs.vb_desc_usage_mask &
ctx->program->info.vs.use_per_attribute_vb_descs ? location : attrib_binding;
desc_index = util_bitcount(ctx->program->info.vs.vb_desc_usage_mask &
u_bit_consecutive(0, desc_index));
Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u));
Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off);
@ -7383,12 +7383,12 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer,
Operand::c32(RING_GSVS_GS * 16u));
unsigned num_components = ctx->program->info->gs.num_stream_output_components[stream];
unsigned num_components = ctx->program->info.gs.num_stream_output_components[stream];
unsigned stride = 4u * num_components * ctx->shader->info.gs.vertices_out;
unsigned stream_offset = 0;
for (unsigned i = 0; i < stream; i++) {
unsigned prev_stride = 4u * ctx->program->info->gs.num_stream_output_components[i] *
unsigned prev_stride = 4u * ctx->program->info.gs.num_stream_output_components[i] *
ctx->shader->info.gs.vertices_out;
stream_offset += prev_stride * ctx->program->wave_size;
}
@ -7421,11 +7421,11 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr)
unsigned offset = 0;
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; i++) {
if (ctx->program->info->gs.output_streams[i] != stream)
if (ctx->program->info.gs.output_streams[i] != stream)
continue;
for (unsigned j = 0; j < 4; j++) {
if (!(ctx->program->info->gs.output_usage_mask[i] & (1 << j)))
if (!(ctx->program->info.gs.output_usage_mask[i] & (1 << j)))
continue;
if (ctx->outputs.mask[i] & (1 << j)) {
@ -10484,10 +10484,10 @@ export_vs_varying(isel_context* ctx, int slot, bool is_pos, int* next_pos)
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG);
const uint8_t *vs_output_param_offset =
ctx->stage.has(SWStage::GS) ? ctx->program->info->vs.outinfo.vs_output_param_offset :
ctx->stage.has(SWStage::TES) ? ctx->program->info->tes.outinfo.vs_output_param_offset :
ctx->stage.has(SWStage::MS) ? ctx->program->info->ms.outinfo.vs_output_param_offset :
ctx->program->info->vs.outinfo.vs_output_param_offset;
ctx->stage.has(SWStage::GS) ? ctx->program->info.vs.outinfo.vs_output_param_offset :
ctx->stage.has(SWStage::TES) ? ctx->program->info.tes.outinfo.vs_output_param_offset :
ctx->stage.has(SWStage::MS) ? ctx->program->info.ms.outinfo.vs_output_param_offset :
ctx->program->info.vs.outinfo.vs_output_param_offset;
assert(vs_output_param_offset);
@ -10569,10 +10569,10 @@ create_vs_exports(isel_context* ctx)
{
assert(ctx->stage.hw == HWStage::VS || ctx->stage.hw == HWStage::NGG);
const radv_vs_output_info* outinfo =
ctx->stage.has(SWStage::GS) ? &ctx->program->info->vs.outinfo :
ctx->stage.has(SWStage::TES) ? &ctx->program->info->tes.outinfo :
ctx->stage.has(SWStage::MS) ? &ctx->program->info->ms.outinfo :
&ctx->program->info->vs.outinfo;
ctx->stage.has(SWStage::GS) ? &ctx->program->info.vs.outinfo :
ctx->stage.has(SWStage::TES) ? &ctx->program->info.tes.outinfo :
ctx->stage.has(SWStage::MS) ? &ctx->program->info.ms.outinfo :
&ctx->program->info.vs.outinfo;
assert(outinfo);
ctx->block->kind |= block_kind_export_end;
@ -10628,10 +10628,10 @@ create_primitive_exports(isel_context *ctx, Temp prim_ch1)
{
assert(ctx->stage.hw == HWStage::NGG);
const radv_vs_output_info* outinfo =
ctx->stage.has(SWStage::GS) ? &ctx->program->info->vs.outinfo :
ctx->stage.has(SWStage::TES) ? &ctx->program->info->tes.outinfo :
ctx->stage.has(SWStage::MS) ? &ctx->program->info->ms.outinfo :
&ctx->program->info->vs.outinfo;
ctx->stage.has(SWStage::GS) ? &ctx->program->info.vs.outinfo :
ctx->stage.has(SWStage::TES) ? &ctx->program->info.tes.outinfo :
ctx->stage.has(SWStage::MS) ? &ctx->program->info.ms.outinfo :
&ctx->program->info.vs.outinfo;
Builder bld(ctx->program, ctx->block);
@ -10699,34 +10699,34 @@ export_fs_mrt_z(isel_context* ctx)
}
/* Both stencil and sample mask only need 16-bits. */
if (!ctx->program->info->ps.writes_z &&
(ctx->program->info->ps.writes_stencil || ctx->program->info->ps.writes_sample_mask)) {
if (!ctx->program->info.ps.writes_z &&
(ctx->program->info.ps.writes_stencil || ctx->program->info.ps.writes_sample_mask)) {
compr = true; /* COMPR flag */
if (ctx->program->info->ps.writes_stencil) {
if (ctx->program->info.ps.writes_stencil) {
/* Stencil should be in X[23:16]. */
values[0] = Operand(ctx->outputs.temps[FRAG_RESULT_STENCIL * 4u]);
values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), values[0]);
enabled_channels |= 0x3;
}
if (ctx->program->info->ps.writes_sample_mask) {
if (ctx->program->info.ps.writes_sample_mask) {
/* SampleMask should be in Y[15:0]. */
values[1] = Operand(ctx->outputs.temps[FRAG_RESULT_SAMPLE_MASK * 4u]);
enabled_channels |= 0xc;
}
} else {
if (ctx->program->info->ps.writes_z) {
if (ctx->program->info.ps.writes_z) {
values[0] = Operand(ctx->outputs.temps[FRAG_RESULT_DEPTH * 4u]);
enabled_channels |= 0x1;
}
if (ctx->program->info->ps.writes_stencil) {
if (ctx->program->info.ps.writes_stencil) {
values[1] = Operand(ctx->outputs.temps[FRAG_RESULT_STENCIL * 4u]);
enabled_channels |= 0x2;
}
if (ctx->program->info->ps.writes_sample_mask) {
if (ctx->program->info.ps.writes_sample_mask) {
values[2] = Operand(ctx->outputs.temps[FRAG_RESULT_SAMPLE_MASK * 4u]);
enabled_channels |= 0x4;
}
@ -10922,7 +10922,7 @@ emit_streamout(isel_context* ctx, unsigned stream)
Temp buf_ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->streamout_buffers));
for (unsigned i = 0; i < 4; i++) {
unsigned stride = ctx->program->info->so.strides[i];
unsigned stride = ctx->program->info.so.strides[i];
if (!stride)
continue;
@ -10945,8 +10945,8 @@ emit_streamout(isel_context* ctx, unsigned stream)
}
}
for (unsigned i = 0; i < ctx->program->info->so.num_outputs; i++) {
const struct radv_stream_output* output = &ctx->program->info->so.outputs[i];
for (unsigned i = 0; i < ctx->program->info.so.num_outputs; i++) {
const struct radv_stream_output* output = &ctx->program->info.so.outputs[i];
if (stream != output->stream)
continue;
@ -11005,8 +11005,8 @@ add_startpgm(struct isel_context* ctx)
ctx->program->private_segment_buffer = get_arg(ctx, ctx->args->ring_offsets);
ctx->program->scratch_offset = get_arg(ctx, ctx->args->ac.scratch_offset);
if (ctx->stage.has(SWStage::VS) && ctx->program->info->vs.dynamic_inputs) {
unsigned num_attributes = util_last_bit(ctx->program->info->vs.vb_desc_usage_mask);
if (ctx->stage.has(SWStage::VS) && ctx->program->info.vs.dynamic_inputs) {
unsigned num_attributes = util_last_bit(ctx->program->info.vs.vb_desc_usage_mask);
for (unsigned i = 0; i < num_attributes; i++) {
Definition def(get_arg(ctx, ctx->args->vs_inputs[i]));
@ -11234,7 +11234,7 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt)
Temp prm_cnt_0;
if (ctx->program->chip_class == GFX10 &&
(ctx->stage.has(SWStage::GS) || ctx->program->info->has_ngg_culling)) {
(ctx->stage.has(SWStage::GS) || ctx->program->info.has_ngg_culling)) {
/* Navi 1x workaround: check whether the workgroup has no output.
* If so, change the number of exported vertices and primitives to 1.
*/
@ -11378,7 +11378,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
visit_cf_list(&ctx, &func->body);
if (ctx.program->info->so.num_outputs && ctx.stage.hw == HWStage::VS)
if (ctx.program->info.so.num_outputs && ctx.stage.hw == HWStage::VS)
emit_streamout(&ctx, 0);
if (ctx.stage.hw == HWStage::VS) {
@ -11438,7 +11438,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
program->private_segment_buffer, Operand::c32(RING_GSVS_VS * 16u));
Operand stream_id = Operand::zero();
if (program->info->so.num_outputs)
if (program->info.so.num_outputs)
stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
get_arg(&ctx, ctx.args->ac.streamout_config), Operand::c32(0x20018u));
@ -11451,8 +11451,8 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
if (stream_id.isConstant() && stream != stream_id.constantValue())
continue;
unsigned num_components = program->info->gs.num_stream_output_components[stream];
if (stream > 0 && (!num_components || !program->info->so.num_outputs))
unsigned num_components = program->info.gs.num_stream_output_components[stream];
if (stream > 0 && (!num_components || !program->info.so.num_outputs))
continue;
memset(ctx.outputs.mask, 0, sizeof(ctx.outputs.mask));
@ -11467,17 +11467,17 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
unsigned offset = 0;
for (unsigned i = 0; i <= VARYING_SLOT_VAR31; ++i) {
if (program->info->gs.output_streams[i] != stream)
if (program->info.gs.output_streams[i] != stream)
continue;
unsigned output_usage_mask = program->info->gs.output_usage_mask[i];
unsigned output_usage_mask = program->info.gs.output_usage_mask[i];
unsigned length = util_last_bit(output_usage_mask);
for (unsigned j = 0; j < length; ++j) {
if (!(output_usage_mask & (1 << j)))
continue;
Temp val = bld.tmp(v1);
unsigned const_offset = offset * program->info->gs.vertices_out * 16 * 4;
unsigned const_offset = offset * program->info.gs.vertices_out * 16 * 4;
load_vmem_mubuf(&ctx, val, gsvs_ring, vtx_offset, Temp(), const_offset, 4, 1, 0u, true,
true, true);
@ -11488,7 +11488,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_
}
}
if (program->info->so.num_outputs) {
if (program->info.so.num_outputs) {
emit_streamout(&ctx, stream);
bld.reset(ctx.block);
}

View file

@ -269,11 +269,11 @@ void
setup_vs_variables(isel_context* ctx, nir_shader* nir)
{
if (ctx->stage == vertex_vs || ctx->stage == vertex_ngg) {
setup_vs_output_info(ctx, nir, &ctx->program->info->vs.outinfo);
setup_vs_output_info(ctx, nir, &ctx->program->info.vs.outinfo);
/* TODO: NGG streamout */
if (ctx->stage.hw == HWStage::NGG)
assert(!ctx->program->info->so.num_outputs);
assert(!ctx->program->info.so.num_outputs);
}
if (ctx->stage == vertex_ngg) {
@ -289,9 +289,9 @@ setup_gs_variables(isel_context* ctx, nir_shader* nir)
{
if (ctx->stage == vertex_geometry_gs || ctx->stage == tess_eval_geometry_gs) {
ctx->program->config->lds_size =
ctx->program->info->gs_ring_info.lds_size; /* Already in units of the alloc granularity */
ctx->program->info.gs_ring_info.lds_size; /* Already in units of the alloc granularity */
} else if (ctx->stage == vertex_geometry_ngg || ctx->stage == tess_eval_geometry_ngg) {
setup_vs_output_info(ctx, nir, &ctx->program->info->vs.outinfo);
setup_vs_output_info(ctx, nir, &ctx->program->info.vs.outinfo);
ctx->program->config->lds_size =
DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
@ -301,23 +301,23 @@ setup_gs_variables(isel_context* ctx, nir_shader* nir)
void
setup_tcs_info(isel_context* ctx, nir_shader* nir, nir_shader* vs)
{
ctx->tcs_in_out_eq = ctx->program->info->vs.tcs_in_out_eq;
ctx->tcs_temp_only_inputs = ctx->program->info->vs.tcs_temp_only_input_mask;
ctx->tcs_num_patches = ctx->program->info->num_tess_patches;
ctx->program->config->lds_size = ctx->program->info->tcs.num_lds_blocks;
ctx->tcs_in_out_eq = ctx->program->info.vs.tcs_in_out_eq;
ctx->tcs_temp_only_inputs = ctx->program->info.vs.tcs_temp_only_input_mask;
ctx->tcs_num_patches = ctx->program->info.num_tess_patches;
ctx->program->config->lds_size = ctx->program->info.tcs.num_lds_blocks;
}
void
setup_tes_variables(isel_context* ctx, nir_shader* nir)
{
ctx->tcs_num_patches = ctx->program->info->num_tess_patches;
ctx->tcs_num_patches = ctx->program->info.num_tess_patches;
if (ctx->stage == tess_eval_vs || ctx->stage == tess_eval_ngg) {
setup_vs_output_info(ctx, nir, &ctx->program->info->tes.outinfo);
setup_vs_output_info(ctx, nir, &ctx->program->info.tes.outinfo);
/* TODO: NGG streamout */
if (ctx->stage.hw == HWStage::NGG)
assert(!ctx->program->info->so.num_outputs);
assert(!ctx->program->info.so.num_outputs);
}
if (ctx->stage == tess_eval_ngg) {
@ -331,7 +331,7 @@ setup_tes_variables(isel_context* ctx, nir_shader* nir)
void
setup_ms_variables(isel_context* ctx, nir_shader* nir)
{
setup_vs_output_info(ctx, nir, &ctx->program->info->ms.outinfo);
setup_vs_output_info(ctx, nir, &ctx->program->info.ms.outinfo);
ctx->program->config->lds_size =
DIV_ROUND_UP(nir->info.shared_size, ctx->program->dev.lds_encoding_granule);
@ -403,9 +403,9 @@ init_context(isel_context* ctx, nir_shader* shader)
ctx->range_ht = _mesa_pointer_hash_table_create(NULL);
ctx->ub_config.min_subgroup_size = 64;
ctx->ub_config.max_subgroup_size = 64;
if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->program->info->cs.subgroup_size) {
ctx->ub_config.min_subgroup_size = ctx->program->info->cs.subgroup_size;
ctx->ub_config.max_subgroup_size = ctx->program->info->cs.subgroup_size;
if (ctx->shader->info.stage == MESA_SHADER_COMPUTE && ctx->program->info.cs.subgroup_size) {
ctx->ub_config.min_subgroup_size = ctx->program->info.cs.subgroup_size;
ctx->ub_config.max_subgroup_size = ctx->program->info.cs.subgroup_size;
}
ctx->ub_config.max_workgroup_invocations = 2048;
ctx->ub_config.max_workgroup_count[0] = 65535;
@ -821,8 +821,8 @@ init_context(isel_context* ctx, nir_shader* shader)
}
}
ctx->program->config->spi_ps_input_ena = ctx->program->info->ps.spi_ps_input;
ctx->program->config->spi_ps_input_addr = ctx->program->info->ps.spi_ps_input;
ctx->program->config->spi_ps_input_ena = ctx->program->info.ps.spi_ps_input;
ctx->program->config->spi_ps_input_addr = ctx->program->info.ps.spi_ps_input;
ctx->cf_info.nir_to_aco = std::move(nir_to_aco);
@ -916,7 +916,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
ctx.options = options;
ctx.stage = program->stage;
program->workgroup_size = program->info->workgroup_size;
program->workgroup_size = program->info.workgroup_size;
assert(program->workgroup_size);
/* Mesh shading only works on GFX10.3+. */
@ -933,7 +933,7 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
unsigned scratch_size = 0;
if (program->stage == gs_copy_vs) {
assert(shader_count == 1);
setup_vs_output_info(&ctx, shaders[0], &program->info->vs.outinfo);
setup_vs_output_info(&ctx, shaders[0], &program->info.vs.outinfo);
} else {
for (unsigned i = 0; i < shader_count; i++) {
nir_shader* nir = shaders[i];

View file

@ -71,7 +71,7 @@ init_program(Program* program, Stage stage, const struct radv_shader_info* info,
{
program->stage = stage;
program->config = config;
program->info = info;
program->info = *info;
program->chip_class = chip_class;
if (family == CHIP_UNKNOWN) {
switch (chip_class) {

View file

@ -2053,7 +2053,7 @@ public:
std::vector<RegClass> temp_rc = {s1};
RegisterDemand max_reg_demand = RegisterDemand();
ac_shader_config* config;
const struct radv_shader_info* info;
struct radv_shader_info info;
enum chip_class chip_class;
enum radeon_family family;
DeviceInfo dev;

View file

@ -382,7 +382,7 @@ max_suitable_waves(Program* program, uint16_t waves)
* These limit occupancy the same way as other stages' LDS usage does.
*/
unsigned lds_bytes_per_interp = 3 * 16;
unsigned lds_param_bytes = lds_bytes_per_interp * program->info->ps.num_interp;
unsigned lds_param_bytes = lds_bytes_per_interp * program->info.ps.num_interp;
lds_per_workgroup += align(lds_param_bytes, program->dev.lds_alloc_granule);
}
unsigned lds_limit = program->wgp_mode ? program->dev.lds_limit * 2 : program->dev.lds_limit;

View file

@ -1083,8 +1083,8 @@ schedule_program(Program* program, live& live_vars)
* Schedule less aggressively when early primitive export is used, and
* keep the position export at the very bottom when late primitive export is used.
*/
if (program->info->has_ngg_culling && program->stage.num_sw_stages() == 1) {
if (!program->info->has_ngg_early_prim_export)
if (program->info.has_ngg_culling && program->stage.num_sw_stages() == 1) {
if (!program->info.has_ngg_early_prim_export)
ctx.schedule_pos_exports = false;
else
ctx.schedule_pos_export_div = 4;

View file

@ -473,7 +473,7 @@ collect_preasm_stats(Program* program)
double usage[(int)BlockCycleEstimator::resource_count] = {0};
std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program);
if (program->stage.has(SWStage::VS) && program->info->vs.has_prolog) {
if (program->stage.has(SWStage::VS) && program->info.vs.has_prolog) {
unsigned vs_input_latency = 320;
for (Definition def : program->vs_inputs) {
blocks[0].vm.push_back(vs_input_latency);