radv: Remove first_task and ib_addr/ib_stride.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22139>
This commit is contained in:
Timur Kristóf 2023-03-27 16:23:53 +02:00 committed by Marge Bot
parent 945384b504
commit 8a426be553
6 changed files with 22 additions and 115 deletions

View file

@ -7896,9 +7896,8 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
cmd_buffer->state.last_drawid = -1;
cmd_buffer->state.last_vertex_offset = -1;
/* Note: firstTask/firstVertex is not supported by this draw packet. */
uint32_t xyz_dim_reg = (base_reg + 4 - SI_SH_REG_OFFSET) >> 2;
uint32_t draw_id_reg = (base_reg + 16 - SI_SH_REG_OFFSET) >> 2;
uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
uint32_t draw_id_reg = (base_reg + 12 - SI_SH_REG_OFFSET) >> 2;
uint32_t draw_id_enable = !!cmd_buffer->state.graphics_pipeline->uses_drawid;
uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */
@ -8014,7 +8013,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
assert(ring_entry_loc->sgpr_idx != -1);
uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr;
uint32_t xyz_dim_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
uint32_t xyz_dim_en = 1; /* TODO: disable XYZ_DIM when unneeded */
uint32_t mode1_en = 1; /* legacy fast launch mode */
@ -8080,15 +8079,13 @@ radv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t ve
ALWAYS_INLINE static void
radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
const uint32_t x, const uint32_t y, const uint32_t z,
const uint32_t first_task)
const uint32_t x, const uint32_t y, const uint32_t z)
{
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
const bool uses_drawid = state->graphics_pipeline->uses_drawid;
radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num);
radeon_emit(cs, first_task);
radeon_emit(cs, x);
radeon_emit(cs, y);
radeon_emit(cs, z);
@ -8100,45 +8097,22 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
}
ALWAYS_INLINE static void
radv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
radv_emit_userdata_mesh_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_cmd_state *state = &cmd_buffer->state;
struct radeon_cmdbuf *cs = cmd_buffer->cs;
struct radv_graphics_pipeline *pipeline = state->graphics_pipeline;
const bool uses_drawid = pipeline->uses_drawid;
radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr, 1);
radeon_emit(cs, 0);
if (uses_drawid) {
radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr + (pipeline->vtx_emit_num - 1) * 4, 1);
radeon_emit(cs, 0);
}
}
ALWAYS_INLINE static void
radv_emit_userdata_task_ib_only(struct radv_cmd_buffer *cmd_buffer, uint64_t ib_va,
uint32_t ib_stride)
{
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
const struct radv_userdata_info *task_ib_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_IB);
if (task_ib_loc->sgpr_idx != -1) {
assert(task_ib_loc->num_sgprs == 3);
unsigned task_ib_reg = R_00B900_COMPUTE_USER_DATA_0 + task_ib_loc->sgpr_idx * 4;
radeon_set_sh_reg_seq(cs, task_ib_reg, 3);
radeon_emit(cs, ib_va);
radeon_emit(cs, ib_va >> 32);
radeon_emit(cs, ib_stride);
}
}
ALWAYS_INLINE static void
radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z,
uint32_t draw_id, uint32_t first_task, uint64_t ib_va)
uint32_t draw_id)
{
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
@ -8164,8 +8138,6 @@ radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t
radeon_set_sh_reg_seq(cs, draw_id_reg, 1);
radeon_emit(cs, draw_id);
}
radv_emit_userdata_task_ib_only(cmd_buffer, ib_va, first_task ? 8 : 0);
}
/* Bind an internal index buffer for GPUs that hang with 0-sized index buffers to handle robustness2
@ -8361,13 +8333,12 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r
ALWAYS_INLINE static void
radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer,
uint32_t x, uint32_t y, uint32_t z,
uint32_t first_task)
uint32_t x, uint32_t y, uint32_t z)
{
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
const uint32_t count = x * y * z;
radv_emit_userdata_mesh(cmd_buffer, x, y, z, first_task);
radv_emit_userdata_mesh(cmd_buffer, x, y, z);
if (!view_mask) {
radv_cs_emit_draw_packet(cmd_buffer, count, 0);
@ -8404,7 +8375,7 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
if (!state->render.view_mask) {
radv_cs_emit_indirect_mesh_draw_packet(cmd_buffer, info->count, count_va, info->stride);
@ -8418,23 +8389,14 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
ALWAYS_INLINE static void
radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
uint32_t z, uint32_t first_task)
uint32_t z)
{
uint64_t fake_ib_va = 0;
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
const unsigned num_views = MAX2(1, util_bitcount(view_mask));
unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
if (first_task) {
/* Pass this as the IB to the shader for emulating firstTask in task shaders. */
uint32_t fake_ib_dwords[2] = {x, first_task};
unsigned fake_ib_offset;
radv_cmd_buffer_upload_data(cmd_buffer, 8, fake_ib_dwords, &fake_ib_offset);
fake_ib_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + fake_ib_offset;
}
radv_emit_userdata_task(cmd_buffer, x, y, z, 0, first_task, fake_ib_va);
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
radv_emit_userdata_task(cmd_buffer, x, y, z, 0);
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
ace_predication_size);
@ -8453,8 +8415,7 @@ radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint3
static void
radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
const struct radv_draw_info *info, uint64_t nv_ib_va,
uint32_t nv_ib_stride)
const struct radv_draw_info *info)
{
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
struct radeon_winsys *ws = cmd_buffer->device->ws;
@ -8510,8 +8471,7 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
}
radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->indirect->bo);
radv_emit_userdata_task_ib_only(cmd_buffer, nv_ib_va, nv_ib_stride);
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
ace_predication_size);
@ -9274,9 +9234,9 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y,
return;
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z, 0);
radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z);
} else {
radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z, 0);
radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z);
}
radv_after_draw(cmd_buffer);
@ -9307,7 +9267,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
return;
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
} else {
radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
}
@ -9342,7 +9302,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
return;
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
} else {
radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
}

View file

@ -273,10 +273,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
break;
case nir_intrinsic_load_task_ib_addr:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr);
replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
break;
case nir_intrinsic_load_task_ib_stride:
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride);
replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
break;
case nir_intrinsic_load_lshs_vertex_stride_amd: {
unsigned io_num = stage == MESA_SHADER_VERTEX ?

View file

@ -594,39 +594,6 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_s
return progress;
}
/* Emulates NV_mesh_shader first_task using first_vertex. */
static bool
radv_lower_ms_workgroup_id(nir_shader *nir)
{
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
bool progress = false;
nir_builder b;
nir_builder_init(&b, impl);
nir_foreach_block(block, impl) {
nir_foreach_instr_safe(instr, block) {
if (instr->type != nir_instr_type_intrinsic)
continue;
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
if (intrin->intrinsic != nir_intrinsic_load_workgroup_id)
continue;
progress = true;
b.cursor = nir_after_instr(instr);
nir_ssa_def *x = nir_channel(&b, &intrin->dest.ssa, 0);
nir_ssa_def *x_full = nir_iadd(&b, x, nir_load_first_vertex(&b));
nir_ssa_def *v = nir_vector_insert_imm(&b, &intrin->dest.ssa, x_full, 0);
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, v, v->parent_instr);
}
}
nir_metadata preserved =
progress ? (nir_metadata_block_index | nir_metadata_dominance) : nir_metadata_all;
nir_metadata_preserve(impl, preserved);
return progress;
}
static bool
is_sincos(const nir_instr *instr, const void *_)
{
@ -889,9 +856,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
if (nir->info.stage == MESA_SHADER_MESH) {
/* NV_mesh_shader: include first_task (aka. first_vertex) in workgroup ID. */
NIR_PASS(_, nir, radv_lower_ms_workgroup_id);
/* Mesh shaders only have a 1D "vertex index" which we use
* as "workgroup index" to emulate the 3D workgroup ID.
*/
@ -1204,7 +1168,6 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
device->physical_device->rad_info.gfx_level, false);
return true;
} else if (nir->info.stage == MESA_SHADER_TASK) {
ac_nir_apply_first_task_to_task_shader(nir);
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
device->physical_device->task_info.num_entries);
return true;

View file

@ -108,7 +108,7 @@ count_tes_user_sgprs(const struct radv_pipeline_key *key)
static uint8_t
count_ms_user_sgprs(const struct radv_shader_info *info)
{
uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */
uint8_t count = 3; /* num_work_groups[3] */
if (info->vs.needs_draw_id)
count++;
@ -415,7 +415,6 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
static void
declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
{
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
if (info->vs.needs_draw_id) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
@ -553,7 +552,7 @@ static void
set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
{
unsigned vs_num =
args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
if (args->ac.task_ring_entry.used)
@ -664,8 +663,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
if (stage == MESA_SHADER_TASK) {
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
}
for (int i = 0; i < 3; i++) {
@ -947,10 +944,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
if (args->ac.task_ring_entry.used) {
set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
}
if (args->task_ib_addr.used) {
assert(args->task_ib_stride.used);
set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
}
break;
case MESA_SHADER_VERTEX:
if (args->ac.view_index.used)

View file

@ -51,10 +51,6 @@ struct radv_shader_args {
struct ac_arg ngg_viewport_scale[2];
struct ac_arg ngg_viewport_translate[2];
/* Task shaders */
struct ac_arg task_ib_addr;
struct ac_arg task_ib_stride;
/* Fragment shaders */
struct ac_arg ps_epilog_pc;
struct ac_arg ps_num_samples;

View file

@ -514,7 +514,6 @@ gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
*
* In the shader:
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
*
* Notes:
@ -711,10 +710,6 @@ gather_shader_info_task(const nir_shader *nir, struct radv_shader_info *info)
* use them.
*/
/* Needed to address the IB to read firstTask in NV_mesh_shader. */
info->vs.needs_draw_id |=
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID);
/* Needed to address the task draw/payload rings. */
info->cs.uses_block_id[0] = true;
info->cs.uses_block_id[1] = true;