From f66f5d1cd55d59b00112c3f1fd32b79f8c52b152 Mon Sep 17 00:00:00 2001 From: Patrick Lerda Date: Fri, 25 Apr 2025 13:26:12 +0200 Subject: [PATCH] r600: implement ARB_shader_draw_parameters This implementation is aimed at compatibility. The new multi draw indirect mode is unrolled at the command stream level. This change was tested on cypress, palm, barts and cayman. It passes all the piglit tests (23/23) and all the khr-gl45 tests (11/11). Signed-off-by: Patrick Lerda Reviewed-by: Gert Wollny Part-of: --- docs/features.txt | 2 +- src/gallium/drivers/r600/r600_pipe.c | 5 +- src/gallium/drivers/r600/r600_pipe.h | 4 +- src/gallium/drivers/r600/r600_shader_common.h | 2 +- src/gallium/drivers/r600/r600_state_common.c | 149 +++++++++++++++--- src/gallium/drivers/r600/sfn/sfn_shader.cpp | 11 ++ src/gallium/drivers/r600/sfn/sfn_shader.h | 3 + .../drivers/r600/sfn/sfn_shader_vs.cpp | 24 ++- src/gallium/drivers/r600/sfn/sfn_shader_vs.h | 1 + 9 files changed, 177 insertions(+), 24 deletions(-) diff --git a/docs/features.txt b/docs/features.txt index 1ec76636069..620ded35ee5 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -232,7 +232,7 @@ GL 4.6, GLSL 4.60 -- all DONE: radeonsi, virgl, zink, iris, crocus/gen7+, d3d12, GL_ARB_pipeline_statistics_query DONE (freedreno/a6xx+, nvc0, r600, llvmpipe, softpipe, crocus/gen6+) GL_ARB_polygon_offset_clamp DONE (freedreno, nv50, nvc0, r600, llvmpipe, v3d, panfrost, crocus) GL_ARB_shader_atomic_counter_ops DONE (freedreno/a5xx+, nvc0, r600, llvmpipe, softpipe, v3d, panfrost) - GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, crocus/gen6+) + GL_ARB_shader_draw_parameters DONE (freedreno/a6xx+, llvmpipe, nvc0, crocus/gen6+, r600/evergreen+) GL_ARB_shader_group_vote DONE (freedreno/a6xx, nvc0, llvmpipe, crocus, r600) GL_ARB_spirv_extensions DONE (freedreno, llvmpipe) GL_ARB_texture_filter_anisotropic DONE (etnaviv/HALTI0, freedreno, nv50, nvc0, r600, softpipe, llvmpipe, v3d, panfrost/v6+, crocus) diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c index d621bb0f578..fad0e84221b 100644 --- a/src/gallium/drivers/r600/r600_pipe.c +++ b/src/gallium/drivers/r600/r600_pipe.c @@ -477,7 +477,10 @@ static void r600_init_screen_caps(struct r600_screen *rscreen) caps->image_atomic_inc_wrap = family >= CHIP_CEDAR; caps->max_texture_gather_components = family >= CHIP_CEDAR ? 4 : 0; /* kernel command checker support is also required */ - caps->draw_indirect = family >= CHIP_CEDAR; + caps->draw_indirect = + caps->multi_draw_indirect_partial_stride = + caps->multi_draw_indirect = + caps->draw_parameters = family >= CHIP_CEDAR; caps->buffer_sampler_view_rgba_only = family < CHIP_CEDAR; diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 6923212afca..764e39f423e 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -477,7 +477,9 @@ struct r600_lds_constant_buffer { /* Processed by the vertex shader */ uint32_t vertexid_base; - uint32_t pad[3]; + uint32_t instance_base; + uint32_t vertex_base; + uint32_t draw_id; }; struct r600_context { diff --git a/src/gallium/drivers/r600/r600_shader_common.h b/src/gallium/drivers/r600/r600_shader_common.h index 01c66b8c6d3..c4f2b427d14 100644 --- a/src/gallium/drivers/r600/r600_shader_common.h +++ b/src/gallium/drivers/r600/r600_shader_common.h @@ -116,7 +116,7 @@ struct r600_shader { uint8_t rat_base; uint8_t image_size_const_offset; bool disable_sb; - bool vs_vertexid; + bool vs_draw_parameters_enabled; }; union r600_shader_key { diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index af681977170..00f16e86dbb 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -2126,6 +2126,94 @@ static inline void r600_emit_rasterizer_prim_state(struct r600_context *rctx) rctx->last_rast_prim = rast_prim; } +#define R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS 3 +#define R600_DRAW_PARAMETERS_ENABLED_CS 12 + +static inline unsigned +r600_draw_parameters(struct r600_context *rctx, + const struct pipe_draw_info *info, + const struct pipe_draw_indirect_info *indirect, + const struct pipe_draw_start_count_bias *draws, + const unsigned draw_id, + const unsigned multi_draw_offset, + const bool is_mapped, + const uint8_t **indirect_ptr, + unsigned *num_patches, + unsigned *cs_space) +{ + const bool draw_parameters_enabled = + rctx->vs_shader->current->shader.vs_draw_parameters_enabled; + + if (unlikely(draw_parameters_enabled)) { + if (indirect) { + const uint32_t indirect_offset = + indirect->offset + (info->index_size ? + 3 * sizeof(uint32_t) : + 2 * sizeof(uint32_t)); + const uint32_t *indirect_data; + + if (!is_mapped) { + *indirect_ptr = + r600_buffer_map_sync_with_rings(&rctx->b, + (struct r600_resource *)indirect->buffer, + PIPE_MAP_READ); + *cs_space += R600_DRAW_PARAMETERS_ENABLED_CS * indirect->draw_count; + } + + indirect_data = (uint32_t *)(*indirect_ptr + + indirect_offset + + multi_draw_offset); + + rctx->lds_constant_buffer.vertexid_base = indirect_data[0]; + rctx->lds_constant_buffer.vertex_base = info->index_size ? + indirect_data[0] : + 0; + rctx->lds_constant_buffer.instance_base = indirect_data[1]; + rctx->lds_constant_buffer.draw_id = draw_id; + } else { + rctx->lds_constant_buffer.vertexid_base = 0; + rctx->lds_constant_buffer.vertex_base = info->index_size ? + draws->index_bias : + 0; + rctx->lds_constant_buffer.instance_base = info->start_instance; + rctx->lds_constant_buffer.draw_id = draw_id; + } + } + + if (unlikely(!is_mapped && indirect)) { + *cs_space += R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS * indirect->draw_count; + } + + evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled); + + return unlikely(indirect) ? + indirect->draw_count : + 1; +} + +static inline void +r600_draw_indirect(struct r600_context *rctx, + struct radeon_cmdbuf *cs, + const struct pipe_draw_indirect_info *indirect, + const unsigned index_size, + const bool render_cond_bit, + const unsigned multi_draw_offset) +{ + assert(rctx->b.gfx_level >= EVERGREEN); + + if (index_size) { + radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDEX_INDIRECT, 1, render_cond_bit)); + radeon_emit(cs, indirect->offset + multi_draw_offset); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA); + } else { + radeon_emit(cs, PKT3(EG_PKT3_DRAW_INDIRECT, 1, render_cond_bit)); + radeon_emit(cs, indirect->offset + multi_draw_offset); + radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX); + } + + assert(radeon_check_cs(rctx, cs) == R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS); +} + static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info, unsigned drawid_offset, const struct pipe_draw_indirect_info *indirect, @@ -2150,6 +2238,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info unsigned global_atomic_count = 0; struct pipe_stream_output_target *count_from_so = NULL; unsigned cs_space = 0; + const uint8_t *indirect_ptr = NULL; + unsigned multi_draw_loop = 1; + unsigned multi_draw_offset = 0; if (indirect && indirect->count_from_stream_output) { count_from_so = indirect->count_from_stream_output; @@ -2308,25 +2399,16 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info } if (rctx->b.gfx_level >= EVERGREEN) { - const bool vertexid = rctx->vs_shader->current->shader.vs_vertexid; - - if (unlikely(indirect && vertexid)) { - const uint32_t indirect_offset = - indirect->offset + (info->index_size ? - 3 * sizeof(uint32_t) : - 2 * sizeof(uint32_t)); - uint8_t *indirect_data = - r600_buffer_map_sync_with_rings(&rctx->b, - (struct r600_resource *)indirect->buffer, - PIPE_MAP_READ); - - rctx->lds_constant_buffer.vertexid_base = - *(uint32_t *)(indirect_data + indirect_offset); - } else { - rctx->lds_constant_buffer.vertexid_base = 0; - } - - evergreen_setup_tess_constants(rctx, info, &num_patches, vertexid); + multi_draw_loop = r600_draw_parameters(rctx, + info, + indirect, + draws, + drawid_offset, + multi_draw_offset, + false, + &indirect_ptr, + &num_patches, + &cs_space); } /* Emit states. */ @@ -2542,6 +2624,35 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT)); } + for (; multi_draw_loop > 1; --multi_draw_loop) { + multi_draw_offset += indirect->stride; + r600_draw_parameters(rctx, + info, + indirect, + draws, + ++drawid_offset, + multi_draw_offset, + true, + &indirect_ptr, + &num_patches, + &cs_space); + + assert(radeon_check_cs(rctx, cs) || true); + + mask = rctx->dirty_atoms; + while (mask != 0) { + r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]); + } + + assert(radeon_check_cs(rctx, cs) <= R600_DRAW_PARAMETERS_ENABLED_CS); + + r600_draw_indirect(rctx, + cs, + indirect, + index_size, + render_cond_bit, + multi_draw_offset); + } if (rctx->b.gfx_level >= EVERGREEN) evergreen_emit_atomic_buffer_save(rctx, false, combined_atomics, global_atomic_count); diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.cpp b/src/gallium/drivers/r600/sfn/sfn_shader.cpp index 1eafe4fbed8..16246d5c9b9 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader.cpp @@ -909,6 +909,17 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr) return emit_get_lds_info_uint(intr, offsetof(struct r600_lds_constant_buffer, vertexid_base)); + case nir_intrinsic_load_base_vertex: + return emit_get_lds_info_uint(intr, + offsetof(struct r600_lds_constant_buffer, + vertex_base)); + case nir_intrinsic_load_base_instance: + return emit_get_lds_info_uint(intr, + offsetof(struct r600_lds_constant_buffer, + instance_base)); + case nir_intrinsic_load_draw_id: + return emit_get_lds_info_uint(intr, + offsetof(struct r600_lds_constant_buffer, draw_id)); case nir_intrinsic_barrier: return emit_barrier(intr); case nir_intrinsic_shared_atomic: diff --git a/src/gallium/drivers/r600/sfn/sfn_shader.h b/src/gallium/drivers/r600/sfn/sfn_shader.h index 0b0fd221d04..d4502da0d88 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader.h @@ -265,6 +265,9 @@ protected: es_tess_coord, es_primitive_id, es_helper_invocation, + es_base_instance, + es_base_vertex, + es_draw_id, es_last }; diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp index f4888f50599..42fe846fea4 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.cpp @@ -168,7 +168,8 @@ void VertexShader::do_get_shader_info(r600_shader *sh_info) { sh_info->processor_type = PIPE_SHADER_VERTEX; - sh_info->vs_vertexid = m_vertex_id != nullptr; + sh_info->vs_draw_parameters_enabled = + m_vertex_id != nullptr || m_draw_parameters_enabled; m_export_stage->get_shader_info(sh_info); } @@ -454,6 +455,15 @@ VertexShader::do_scan_instruction(nir_instr *instr) case nir_intrinsic_load_tcs_rel_patch_id_r600: m_sv_values.set(es_rel_patch_id); break; + case nir_intrinsic_load_base_instance: + m_sv_values.set(es_base_instance); + break; + case nir_intrinsic_load_base_vertex: + m_sv_values.set(es_base_vertex); + break; + case nir_intrinsic_load_draw_id: + m_sv_values.set(es_draw_id); + break; default: return false; } @@ -507,6 +517,18 @@ VertexShader::do_allocate_reserved_registers() m_rel_vertex_id = value_factory().allocate_pinned_register(0, 1); } + if (m_sv_values.test(es_base_instance)) { + m_draw_parameters_enabled = true; + } + + if (m_sv_values.test(es_base_vertex)) { + m_draw_parameters_enabled = true; + } + + if (m_sv_values.test(es_draw_id)) { + m_draw_parameters_enabled = true; + } + return m_last_vertex_attribute_register + 1; } diff --git a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h index 3aff18a6dac..38383a11a22 100644 --- a/src/gallium/drivers/r600/sfn/sfn_shader_vs.h +++ b/src/gallium/drivers/r600/sfn/sfn_shader_vs.h @@ -161,6 +161,7 @@ private: PRegister m_instance_id{nullptr}; PRegister m_rel_vertex_id{nullptr}; bool m_vs_as_gs_a; + bool m_draw_parameters_enabled{false}; }; } // namespace r600