mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-05-05 13:58:04 +02:00
radv: Remove first_task and ib_addr/ib_stride.
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22139>
This commit is contained in:
parent
945384b504
commit
8a426be553
6 changed files with 22 additions and 115 deletions
|
|
@ -7896,9 +7896,8 @@ radv_cs_emit_indirect_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint3
|
|||
cmd_buffer->state.last_drawid = -1;
|
||||
cmd_buffer->state.last_vertex_offset = -1;
|
||||
|
||||
/* Note: firstTask/firstVertex is not supported by this draw packet. */
|
||||
uint32_t xyz_dim_reg = (base_reg + 4 - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t draw_id_reg = (base_reg + 16 - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t draw_id_reg = (base_reg + 12 - SI_SH_REG_OFFSET) >> 2;
|
||||
|
||||
uint32_t draw_id_enable = !!cmd_buffer->state.graphics_pipeline->uses_drawid;
|
||||
uint32_t xyz_dim_enable = 1; /* TODO: disable XYZ_DIM when unneeded */
|
||||
|
|
@ -8014,7 +8013,7 @@ radv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
|
|||
assert(ring_entry_loc->sgpr_idx != -1);
|
||||
|
||||
uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr;
|
||||
uint32_t xyz_dim_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t xyz_dim_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
|
||||
uint32_t xyz_dim_en = 1; /* TODO: disable XYZ_DIM when unneeded */
|
||||
uint32_t mode1_en = 1; /* legacy fast launch mode */
|
||||
|
|
@ -8080,15 +8079,13 @@ radv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t ve
|
|||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
|
||||
const uint32_t x, const uint32_t y, const uint32_t z,
|
||||
const uint32_t first_task)
|
||||
const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
struct radv_cmd_state *state = &cmd_buffer->state;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
const bool uses_drawid = state->graphics_pipeline->uses_drawid;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num);
|
||||
radeon_emit(cs, first_task);
|
||||
radeon_emit(cs, x);
|
||||
radeon_emit(cs, y);
|
||||
radeon_emit(cs, z);
|
||||
|
|
@ -8100,45 +8097,22 @@ radv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_emit_userdata_mesh_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_cmd_state *state = &cmd_buffer->state;
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->cs;
|
||||
struct radv_graphics_pipeline *pipeline = state->graphics_pipeline;
|
||||
const bool uses_drawid = pipeline->uses_drawid;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr, 1);
|
||||
radeon_emit(cs, 0);
|
||||
|
||||
if (uses_drawid) {
|
||||
radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr + (pipeline->vtx_emit_num - 1) * 4, 1);
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_userdata_task_ib_only(struct radv_cmd_buffer *cmd_buffer, uint64_t ib_va,
|
||||
uint32_t ib_stride)
|
||||
{
|
||||
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
|
||||
|
||||
const struct radv_userdata_info *task_ib_loc = radv_get_user_sgpr(task_shader, AC_UD_CS_TASK_IB);
|
||||
|
||||
if (task_ib_loc->sgpr_idx != -1) {
|
||||
assert(task_ib_loc->num_sgprs == 3);
|
||||
unsigned task_ib_reg = R_00B900_COMPUTE_USER_DATA_0 + task_ib_loc->sgpr_idx * 4;
|
||||
|
||||
radeon_set_sh_reg_seq(cs, task_ib_reg, 3);
|
||||
radeon_emit(cs, ib_va);
|
||||
radeon_emit(cs, ib_va >> 32);
|
||||
radeon_emit(cs, ib_stride);
|
||||
}
|
||||
}
|
||||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t draw_id, uint32_t first_task, uint64_t ib_va)
|
||||
uint32_t draw_id)
|
||||
{
|
||||
struct radv_shader *task_shader = cmd_buffer->state.shaders[MESA_SHADER_TASK];
|
||||
struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
|
||||
|
|
@ -8164,8 +8138,6 @@ radv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t
|
|||
radeon_set_sh_reg_seq(cs, draw_id_reg, 1);
|
||||
radeon_emit(cs, draw_id);
|
||||
}
|
||||
|
||||
radv_emit_userdata_task_ib_only(cmd_buffer, ib_va, first_task ? 8 : 0);
|
||||
}
|
||||
|
||||
/* Bind an internal index buffer for GPUs that hang with 0-sized index buffers to handle robustness2
|
||||
|
|
@ -8361,13 +8333,12 @@ radv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct r
|
|||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer,
|
||||
uint32_t x, uint32_t y, uint32_t z,
|
||||
uint32_t first_task)
|
||||
uint32_t x, uint32_t y, uint32_t z)
|
||||
{
|
||||
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
|
||||
const uint32_t count = x * y * z;
|
||||
|
||||
radv_emit_userdata_mesh(cmd_buffer, x, y, z, first_task);
|
||||
radv_emit_userdata_mesh(cmd_buffer, x, y, z);
|
||||
|
||||
if (!view_mask) {
|
||||
radv_cs_emit_draw_packet(cmd_buffer, count, 0);
|
||||
|
|
@ -8404,7 +8375,7 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
|
|||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
|
||||
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
|
||||
|
||||
if (!state->render.view_mask) {
|
||||
radv_cs_emit_indirect_mesh_draw_packet(cmd_buffer, info->count, count_va, info->stride);
|
||||
|
|
@ -8418,23 +8389,14 @@ radv_emit_indirect_mesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
|
|||
|
||||
ALWAYS_INLINE static void
|
||||
radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
|
||||
uint32_t z, uint32_t first_task)
|
||||
uint32_t z)
|
||||
{
|
||||
uint64_t fake_ib_va = 0;
|
||||
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
|
||||
const unsigned num_views = MAX2(1, util_bitcount(view_mask));
|
||||
unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
|
||||
|
||||
if (first_task) {
|
||||
/* Pass this as the IB to the shader for emulating firstTask in task shaders. */
|
||||
uint32_t fake_ib_dwords[2] = {x, first_task};
|
||||
unsigned fake_ib_offset;
|
||||
radv_cmd_buffer_upload_data(cmd_buffer, 8, fake_ib_dwords, &fake_ib_offset);
|
||||
fake_ib_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + fake_ib_offset;
|
||||
}
|
||||
|
||||
radv_emit_userdata_task(cmd_buffer, x, y, z, 0, first_task, fake_ib_va);
|
||||
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
|
||||
radv_emit_userdata_task(cmd_buffer, x, y, z, 0);
|
||||
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
|
||||
radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
|
||||
cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
|
||||
ace_predication_size);
|
||||
|
|
@ -8453,8 +8415,7 @@ radv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint3
|
|||
|
||||
static void
|
||||
radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
|
||||
const struct radv_draw_info *info, uint64_t nv_ib_va,
|
||||
uint32_t nv_ib_stride)
|
||||
const struct radv_draw_info *info)
|
||||
{
|
||||
const uint32_t view_mask = cmd_buffer->state.render.view_mask;
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
|
|
@ -8510,8 +8471,7 @@ radv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
|
|||
}
|
||||
|
||||
radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->indirect->bo);
|
||||
radv_emit_userdata_task_ib_only(cmd_buffer, nv_ib_va, nv_ib_stride);
|
||||
radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
|
||||
radv_emit_userdata_mesh_draw_id_0(cmd_buffer);
|
||||
radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
|
||||
cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
|
||||
ace_predication_size);
|
||||
|
|
@ -9274,9 +9234,9 @@ radv_CmdDrawMeshTasksEXT(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y,
|
|||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z, 0);
|
||||
radv_emit_direct_taskmesh_draw_packets(cmd_buffer, x, y, z);
|
||||
} else {
|
||||
radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z, 0);
|
||||
radv_emit_direct_mesh_draw_packet(cmd_buffer, x, y, z);
|
||||
}
|
||||
|
||||
radv_after_draw(cmd_buffer);
|
||||
|
|
@ -9307,7 +9267,7 @@ radv_CmdDrawMeshTasksIndirectEXT(VkCommandBuffer commandBuffer, VkBuffer _buffer
|
|||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
|
||||
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
|
||||
} else {
|
||||
radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
|
||||
}
|
||||
|
|
@ -9342,7 +9302,7 @@ radv_CmdDrawMeshTasksIndirectCountEXT(VkCommandBuffer commandBuffer, VkBuffer _b
|
|||
return;
|
||||
|
||||
if (radv_cmdbuf_has_stage(cmd_buffer, MESA_SHADER_TASK)) {
|
||||
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, 0, 0);
|
||||
radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info);
|
||||
} else {
|
||||
radv_emit_indirect_mesh_draw_packets(cmd_buffer, &info);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -273,10 +273,10 @@ lower_abi_instr(nir_builder *b, nir_instr *instr, void *state)
|
|||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->ac.task_ring_entry);
|
||||
break;
|
||||
case nir_intrinsic_load_task_ib_addr:
|
||||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_addr);
|
||||
replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
|
||||
break;
|
||||
case nir_intrinsic_load_task_ib_stride:
|
||||
replacement = ac_nir_load_arg(b, &s->args->ac, s->args->task_ib_stride);
|
||||
replacement = nir_imm_zero(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);
|
||||
break;
|
||||
case nir_intrinsic_load_lshs_vertex_stride_amd: {
|
||||
unsigned io_num = stage == MESA_SHADER_VERTEX ?
|
||||
|
|
|
|||
|
|
@ -594,39 +594,6 @@ radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_s
|
|||
return progress;
|
||||
}
|
||||
|
||||
/* Emulates NV_mesh_shader first_task using first_vertex. */
|
||||
static bool
|
||||
radv_lower_ms_workgroup_id(nir_shader *nir)
|
||||
{
|
||||
nir_function_impl *impl = nir_shader_get_entrypoint(nir);
|
||||
bool progress = false;
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_load_workgroup_id)
|
||||
continue;
|
||||
|
||||
progress = true;
|
||||
b.cursor = nir_after_instr(instr);
|
||||
nir_ssa_def *x = nir_channel(&b, &intrin->dest.ssa, 0);
|
||||
nir_ssa_def *x_full = nir_iadd(&b, x, nir_load_first_vertex(&b));
|
||||
nir_ssa_def *v = nir_vector_insert_imm(&b, &intrin->dest.ssa, x_full, 0);
|
||||
nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, v, v->parent_instr);
|
||||
}
|
||||
}
|
||||
|
||||
nir_metadata preserved =
|
||||
progress ? (nir_metadata_block_index | nir_metadata_dominance) : nir_metadata_all;
|
||||
nir_metadata_preserve(impl, preserved);
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_sincos(const nir_instr *instr, const void *_)
|
||||
{
|
||||
|
|
@ -889,9 +856,6 @@ radv_shader_spirv_to_nir(struct radv_device *device, const struct radv_pipeline_
|
|||
NIR_PASS(_, nir, nir_lower_compute_system_values, &csv_options);
|
||||
|
||||
if (nir->info.stage == MESA_SHADER_MESH) {
|
||||
/* NV_mesh_shader: include first_task (aka. first_vertex) in workgroup ID. */
|
||||
NIR_PASS(_, nir, radv_lower_ms_workgroup_id);
|
||||
|
||||
/* Mesh shaders only have a 1D "vertex index" which we use
|
||||
* as "workgroup index" to emulate the 3D workgroup ID.
|
||||
*/
|
||||
|
|
@ -1204,7 +1168,6 @@ radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *sta
|
|||
device->physical_device->rad_info.gfx_level, false);
|
||||
return true;
|
||||
} else if (nir->info.stage == MESA_SHADER_TASK) {
|
||||
ac_nir_apply_first_task_to_task_shader(nir);
|
||||
ac_nir_lower_task_outputs_to_mem(nir, AC_TASK_PAYLOAD_ENTRY_BYTES,
|
||||
device->physical_device->task_info.num_entries);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -108,7 +108,7 @@ count_tes_user_sgprs(const struct radv_pipeline_key *key)
|
|||
static uint8_t
|
||||
count_ms_user_sgprs(const struct radv_shader_info *info)
|
||||
{
|
||||
uint8_t count = 1 + 3; /* firstTask + num_work_groups[3] */
|
||||
uint8_t count = 3; /* num_work_groups[3] */
|
||||
|
||||
if (info->vs.needs_draw_id)
|
||||
count++;
|
||||
|
|
@ -415,7 +415,6 @@ declare_tes_input_vgprs(struct radv_shader_args *args)
|
|||
static void
|
||||
declare_ms_input_sgprs(const struct radv_shader_info *info, struct radv_shader_args *args)
|
||||
{
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.base_vertex);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 3, AC_ARG_INT, &args->ac.num_work_groups);
|
||||
if (info->vs.needs_draw_id) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.draw_id);
|
||||
|
|
@ -553,7 +552,7 @@ static void
|
|||
set_ms_input_locs(struct radv_shader_args *args, uint8_t *user_sgpr_idx)
|
||||
{
|
||||
unsigned vs_num =
|
||||
args->ac.base_vertex.used + 3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
|
||||
3 * args->ac.num_work_groups.used + args->ac.draw_id.used;
|
||||
set_loc_shader(args, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, vs_num);
|
||||
|
||||
if (args->ac.task_ring_entry.used)
|
||||
|
|
@ -664,8 +663,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
|
|||
|
||||
if (stage == MESA_SHADER_TASK) {
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.task_ring_entry);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_INT, &args->task_ib_addr);
|
||||
ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->task_ib_stride);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 3; i++) {
|
||||
|
|
@ -947,10 +944,6 @@ radv_declare_shader_args(const struct radv_device *device, const struct radv_pip
|
|||
if (args->ac.task_ring_entry.used) {
|
||||
set_loc_shader(args, AC_UD_TASK_RING_ENTRY, &user_sgpr_idx, 1);
|
||||
}
|
||||
if (args->task_ib_addr.used) {
|
||||
assert(args->task_ib_stride.used);
|
||||
set_loc_shader(args, AC_UD_CS_TASK_IB, &user_sgpr_idx, 3);
|
||||
}
|
||||
break;
|
||||
case MESA_SHADER_VERTEX:
|
||||
if (args->ac.view_index.used)
|
||||
|
|
|
|||
|
|
@ -51,10 +51,6 @@ struct radv_shader_args {
|
|||
struct ac_arg ngg_viewport_scale[2];
|
||||
struct ac_arg ngg_viewport_translate[2];
|
||||
|
||||
/* Task shaders */
|
||||
struct ac_arg task_ib_addr;
|
||||
struct ac_arg task_ib_stride;
|
||||
|
||||
/* Fragment shaders */
|
||||
struct ac_arg ps_epilog_pc;
|
||||
struct ac_arg ps_num_samples;
|
||||
|
|
|
|||
|
|
@ -514,7 +514,6 @@ gather_shader_info_mesh(const nir_shader *nir, struct radv_shader_info *info)
|
|||
* - drawing 1 input vertex ~ launching 1 mesh shader workgroup
|
||||
*
|
||||
* In the shader:
|
||||
* - base vertex ~ first workgroup index (firstTask in NV_mesh_shader)
|
||||
* - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D)
|
||||
*
|
||||
* Notes:
|
||||
|
|
@ -711,10 +710,6 @@ gather_shader_info_task(const nir_shader *nir, struct radv_shader_info *info)
|
|||
* use them.
|
||||
*/
|
||||
|
||||
/* Needed to address the IB to read firstTask in NV_mesh_shader. */
|
||||
info->vs.needs_draw_id |=
|
||||
BITSET_TEST(nir->info.system_values_read, SYSTEM_VALUE_WORKGROUP_ID);
|
||||
|
||||
/* Needed to address the task draw/payload rings. */
|
||||
info->cs.uses_block_id[0] = true;
|
||||
info->cs.uses_block_id[1] = true;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue