r600: implement ARB_shader_draw_parameters

This implementation is aimed at compatibility. The
new multi draw indirect mode is unrolled at the
command stream level.

This change was tested on cypress, palm, barts and cayman.
It passes all the piglit tests (23/23) and all the khr-gl45
tests (11/11).

Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Reviewed-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34726>
This commit is contained in:
Patrick Lerda 2025-04-25 13:26:12 +02:00 committed by Marge Bot
parent 285d57d64b
commit f66f5d1cd5
9 changed files with 177 additions and 24 deletions

View file

@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+, d3d12,
GL_ARB_pipeline_statistics_query DONE (freedreno/a6xx+, nvc0, r600, llvmpipe, softpipe, crocus/gen6+)
GL_ARB_polygon_offset_clamp DONE (freedreno, nv50, nvc0, r600, llvmpipe, v3d, panfrost, crocus)
GL_ARB_shader_atomic_counter_ops DONE (freedreno/a5xx+, nvc0, r600, llvmpipe, softpipe, v3d, panfrost)
GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, crocus/gen6+)
GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, crocus/gen6+, r600/evergreen+)
GL_ARB_shader_group_vote DONE (freedreno/a6xx, nvc0, llvmpipe, crocus, r600)
GL_ARB_spirv_extensions DONE (freedreno, llvmpipe)
GL_ARB_texture_filter_anisotropic DONE (etnaviv/HALTI0, freedreno, nv50, nvc0, r600, softpipe, llvmpipe, v3d, panfrost/v6+, crocus)

View file

@ -477,7 +477,10 @@ static void r600_init_screen_caps(struct r600_screen *rscreen)
caps->image_atomic_inc_wrap = family >= CHIP_CEDAR;
caps->max_texture_gather_components = family >= CHIP_CEDAR ? 4 : 0;
/* kernel command checker support is also required */
caps->draw_indirect = family >= CHIP_CEDAR;
caps->draw_indirect =
caps->multi_draw_indirect_partial_stride =
caps->multi_draw_indirect =
caps->draw_parameters = family >= CHIP_CEDAR;
caps->buffer_sampler_view_rgba_only = family < CHIP_CEDAR;

View file

@ -477,7 +477,9 @@ struct r600_lds_constant_buffer {
/* Processed by the vertex shader */
uint32_t vertexid_base;
uint32_t pad[3];
uint32_t instance_base;
uint32_t vertex_base;
uint32_t draw_id;
};
struct r600_context {

View file

@ -116,7 +116,7 @@ struct r600_shader {
uint8_t rat_base;
uint8_t image_size_const_offset;
bool disable_sb;
bool vs_vertexid;
bool vs_draw_parameters_enabled;
};
union r600_shader_key {

View file

@ -2126,6 +2126,94 @@ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx)
rctx->last_rast_prim = rast_prim;
}
#define R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS 3
#define R600_DRAW_PARAMETERS_ENABLED_CS 12
static inline unsigned
r600_draw_parameters(struct r600_context *rctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count_bias *draws,
const unsigned draw_id,
const unsigned multi_draw_offset,
const bool is_mapped,
const uint8_t **indirect_ptr,
unsigned *num_patches,
unsigned *cs_space)
{
const bool draw_parameters_enabled =
rctx->vs_shader->current->shader.vs_draw_parameters_enabled;
if (unlikely(draw_parameters_enabled)) {
if (indirect) {
const uint32_t indirect_offset =
indirect->offset + (info->index_size ?
3 * sizeof(uint32_t) :
2 * sizeof(uint32_t));
const uint32_t *indirect_data;
if (!is_mapped) {
*indirect_ptr =
r600_buffer_map_sync_with_rings(&rctx->b,
(struct r600_resource *)indirect->buffer,
PIPE_MAP_READ);
*cs_space += R600_DRAW_PARAMETERS_ENABLED_CS * indirect->draw_count;
}
indirect_data = (uint32_t *)(*indirect_ptr +
indirect_offset +
multi_draw_offset);
rctx->lds_constant_buffer.vertexid_base = indirect_data[0];
rctx->lds_constant_buffer.vertex_base = info->index_size ?
indirect_data[0] :
0;
rctx->lds_constant_buffer.instance_base = indirect_data[1];
rctx->lds_constant_buffer.draw_id = draw_id;
} else {
rctx->lds_constant_buffer.vertexid_base = 0;
rctx->lds_constant_buffer.vertex_base = info->index_size ?
draws->index_bias :
0;
rctx->lds_constant_buffer.instance_base = info->start_instance;
rctx->lds_constant_buffer.draw_id = draw_id;
}
}
if (unlikely(!is_mapped && indirect)) {
*cs_space += R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS * indirect->draw_count;
}
evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled);
return unlikely(indirect) ?
indirect->draw_count :
1;
}
static inline void
r600_draw_indirect(struct r600_context *rctx,
struct radeon_cmdbuf *cs,
const struct pipe_draw_indirect_info *indirect,
const unsigned index_size,
const bool render_cond_bit,
const unsigned multi_draw_offset)
{
assert(rctx->b.gfx_level >= EVERGREEN);
if (index_size) {
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit));
radeon_emit(cs, indirect->offset + multi_draw_offset);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
} else {
radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit));
radeon_emit(cs, indirect->offset + multi_draw_offset);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
}
assert(radeon_check_cs(rctx, cs) == R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS);
}
static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info,
unsigned drawid_offset,
const struct pipe_draw_indirect_info *indirect,
@ -2150,6 +2238,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
unsigned global_atomic_count = 0;
struct pipe_stream_output_target *count_from_so = NULL;
unsigned cs_space = 0;
const uint8_t *indirect_ptr = NULL;
unsigned multi_draw_loop = 1;
unsigned multi_draw_offset = 0;
if (indirect && indirect->count_from_stream_output) {
count_from_so = indirect->count_from_stream_output;
@ -2308,25 +2399,16 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
}
if (rctx->b.gfx_level >= EVERGREEN) {
const bool vertexid = rctx->vs_shader->current->shader.vs_vertexid;
if (unlikely(indirect && vertexid)) {
const uint32_t indirect_offset =
indirect->offset + (info->index_size ?
3 * sizeof(uint32_t) :
2 * sizeof(uint32_t));
uint8_t *indirect_data =
r600_buffer_map_sync_with_rings(&rctx->b,
(struct r600_resource *)indirect->buffer,
PIPE_MAP_READ);
rctx->lds_constant_buffer.vertexid_base =
*(uint32_t *)(indirect_data + indirect_offset);
} else {
rctx->lds_constant_buffer.vertexid_base = 0;
}
evergreen_setup_tess_constants(rctx, info, &num_patches, vertexid);
multi_draw_loop = r600_draw_parameters(rctx,
info,
indirect,
draws,
drawid_offset,
multi_draw_offset,
false,
&indirect_ptr,
&num_patches,
&cs_space);
}
/* Emit states. */
@ -2542,6 +2624,35 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT));
}
for (; multi_draw_loop > 1; --multi_draw_loop) {
multi_draw_offset += indirect->stride;
r600_draw_parameters(rctx,
info,
indirect,
draws,
++drawid_offset,
multi_draw_offset,
true,
&indirect_ptr,
&num_patches,
&cs_space);
assert(radeon_check_cs(rctx, cs) || true);
mask = rctx->dirty_atoms;
while (mask != 0) {
r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
}
assert(radeon_check_cs(rctx, cs) <= R600_DRAW_PARAMETERS_ENABLED_CS);
r600_draw_indirect(rctx,
cs,
indirect,
index_size,
render_cond_bit,
multi_draw_offset);
}
if (rctx->b.gfx_level >= EVERGREEN)
evergreen_emit_atomic_buffer_save(rctx, false, combined_atomics, global_atomic_count);

View file

@ -909,6 +909,17 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer,
vertexid_base));
case nir_intrinsic_load_base_vertex:
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer,
vertex_base));
case nir_intrinsic_load_base_instance:
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer,
instance_base));
case nir_intrinsic_load_draw_id:
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer, draw_id));
case nir_intrinsic_barrier:
return emit_barrier(intr);
case nir_intrinsic_shared_atomic:

View file

@ -265,6 +265,9 @@ protected:
es_tess_coord,
es_primitive_id,
es_helper_invocation,
es_base_instance,
es_base_vertex,
es_draw_id,
es_last
};

View file

@ -168,7 +168,8 @@ void
VertexShader::do_get_shader_info(r600_shader *sh_info)
{
sh_info->processor_type = PIPE_SHADER_VERTEX;
sh_info->vs_vertexid = m_vertex_id != nullptr;
sh_info->vs_draw_parameters_enabled =
m_vertex_id != nullptr || m_draw_parameters_enabled;
m_export_stage->get_shader_info(sh_info);
}
@ -454,6 +455,15 @@ VertexShader::do_scan_instruction(nir_instr *instr)
case nir_intrinsic_load_tcs_rel_patch_id_r600:
m_sv_values.set(es_rel_patch_id);
break;
case nir_intrinsic_load_base_instance:
m_sv_values.set(es_base_instance);
break;
case nir_intrinsic_load_base_vertex:
m_sv_values.set(es_base_vertex);
break;
case nir_intrinsic_load_draw_id:
m_sv_values.set(es_draw_id);
break;
default:
return false;
}
@ -507,6 +517,18 @@ VertexShader::do_allocate_reserved_registers()
m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1);
}
if (m_sv_values.test(es_base_instance)) {
m_draw_parameters_enabled = true;
}
if (m_sv_values.test(es_base_vertex)) {
m_draw_parameters_enabled = true;
}
if (m_sv_values.test(es_draw_id)) {
m_draw_parameters_enabled = true;
}
return m_last_vertex_attribute_register + 1;
}

View file

@ -161,6 +161,7 @@ private:
PRegister m_instance_id{nullptr};
PRegister m_rel_vertex_id{nullptr};
bool m_vs_as_gs_a;
bool m_draw_parameters_enabled{false};
};
} // namespace r600