r600: implement a conformant gl_VertexID with base offset

As explained by d80701df8a, the r600 hardware doesn't implement
the base vertex offset. This change implements this offset as a
constant buffer entry shared with lds.

This change is inspired from 3511a51be0 ("freedreno/ir3: handle
VTXID_BASE for indirect draws").

Note: this feature requires at least evergreen.

This change was tested on palm and cayman. Here are the tests fixed:
spec/arb_draw_indirect/gl_vertexid used with gldrawarraysindirect: fail pass
spec/arb_draw_indirect/gl_vertexid used with gldrawelementsindirect: fail pass
spec/arb_multi_draw_indirect/gl-3.0-multidrawarrays-vertexid -indirect: fail pass

Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Reviewed-by: Gert Wollny <gert.wollny@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/32769>
This commit is contained in:
Patrick Lerda 2024-12-20 18:40:29 +01:00 committed by Marge Bot
parent c016f84805
commit 3cfcb10d8b
9 changed files with 73 additions and 10 deletions

View file

@ -765,9 +765,6 @@ spec@arb_compute_shader@local-id-explosion,Fail
spec@arb_depth_buffer_float@fbo-clear-formats stencil,Fail
spec@arb_depth_buffer_float@fbo-clear-formats stencil@GL_DEPTH32F_STENCIL8,Fail
spec@arb_draw_indirect@gl_vertexid used with gldrawarraysindirect,Fail
spec@arb_draw_indirect@gl_vertexid used with gldrawelementsindirect,Fail
# "Testing level 3
# Probe at (0,8)
# Expected: 219
@ -782,7 +779,6 @@ spec@arb_gpu_shader5@execution@built-in-functions@fs-interpolateatsample-array-o
spec@arb_gpu_shader5@execution@built-in-functions@fs-interpolateatsample-block-array,Fail
spec@arb_multi_draw_indirect@arb_draw_elements_base_vertex-multidrawelements -indirect,Fail
spec@arb_multi_draw_indirect@gl-3.0-multidrawarrays-vertexid -indirect,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex,Fail
spec@arb_program_interface_query@arb_program_interface_query-getprogramresourceindex@'vs_input2[1][0]' on GL_PROGRAM_INPUT,Fail

View file

@ -4601,7 +4601,10 @@ void evergreen_init_state_functions(struct r600_context *rctx)
* uint32_t perpatch_output_offset
* and the same constbuf is bound to LS/HS/VS(ES).
*/
void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe_draw_info *info, unsigned *num_patches)
void evergreen_setup_tess_constants(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned *num_patches,
const bool vertexid)
{
struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
struct r600_pipe_shader_selector *ls = rctx->vs_shader;
@ -4621,8 +4624,16 @@ void evergreen_setup_tess_constants(struct r600_context *rctx, const struct pipe
if (!rctx->tes_shader) {
rctx->lds_alloc = 0;
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
R600_LDS_INFO_CONST_BUFFER, false, NULL);
if (unlikely(vertexid))
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
R600_LDS_INFO_CONST_BUFFER, false,
&rctx->lds_constbuf_pipe);
else
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_VERTEX,
R600_LDS_INFO_CONST_BUFFER, false,
NULL);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_CTRL,
R600_LDS_INFO_CONST_BUFFER, false, NULL);
rctx->b.b.set_constant_buffer(&rctx->b.b, PIPE_SHADER_TESS_EVAL,

View file

@ -473,6 +473,10 @@ struct r600_lds_constant_buffer {
uint32_t output_vertex_size;
uint32_t output_patch0_offset;
uint32_t perpatch_output_offset;
/* Processed by the vertex shader */
uint32_t vertexid_base;
uint32_t pad[3];
};
struct r600_context {
@ -792,7 +796,8 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
uint64_t size);
void evergreen_setup_tess_constants(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned *num_patches);
unsigned *num_patches,
const bool vertexid);
uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned num_patches);

View file

@ -1214,6 +1214,7 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.lower_image_offset_to_range_base = 1,
.vectorize_tess_levels = 1,
.io_options = nir_io_mediump_is_32bit,
.vertex_id_zero_based = rscreen->info.gfx_level >= EVERGREEN,
};
rscreen->nir_options = nir_options;

View file

@ -114,6 +114,7 @@ struct r600_shader {
uint8_t rat_base;
uint8_t image_size_const_offset;
bool disable_sb;
bool vs_vertexid;
};
union r600_shader_key {

View file

@ -2289,8 +2289,27 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
}
if (rctx->b.gfx_level >= EVERGREEN)
evergreen_setup_tess_constants(rctx, info, &num_patches);
if (rctx->b.gfx_level >= EVERGREEN) {
const bool vertexid = rctx->vs_shader->current->shader.vs_vertexid;
if (unlikely(indirect && vertexid)) {
const uint32_t indirect_offset =
indirect->offset + (info->index_size ?
3 * sizeof(uint32_t) :
2 * sizeof(uint32_t));
uint8_t *indirect_data =
r600_buffer_map_sync_with_rings(&rctx->b,
(struct r600_resource *)indirect->buffer,
PIPE_MAP_READ);
rctx->lds_constant_buffer.vertexid_base =
*(uint32_t *)(indirect_data + indirect_offset);
} else {
rctx->lds_constant_buffer.vertexid_base = 0;
}
evergreen_setup_tess_constants(rctx, info, &num_patches, vertexid);
}
/* Emit states. */
r600_need_cs_space(rctx, has_user_indices ? 5 : 0, true, util_bitcount(atomic_used_mask));

View file

@ -905,6 +905,10 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
return emit_load_tcs_param_base(intr, 0);
case nir_intrinsic_load_tcs_out_param_base_r600:
return emit_load_tcs_param_base(intr, 16);
case nir_intrinsic_load_first_vertex:
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer,
vertexid_base));
case nir_intrinsic_barrier:
return emit_barrier(intr);
case nir_intrinsic_shared_atomic:
@ -1493,6 +1497,26 @@ Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset)
return true;
}
bool
Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset)
{
auto src = value_factory().temp_register();
emit_instruction(
new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
auto dest = value_factory().dest_vec4(instr->def, pin_group);
auto fetch = new LoadFromBuffer(dest,
{0, 7, 7, 7},
src,
offset,
R600_LDS_INFO_CONST_BUFFER,
nullptr,
fmt_32_float);
emit_instruction(fetch);
return true;
}
bool
Shader::emit_shader_clock(nir_intrinsic_instr *instr)
{

View file

@ -311,6 +311,7 @@ private:
bool emit_local_store(nir_intrinsic_instr *intr);
bool emit_local_load(nir_intrinsic_instr *instr);
bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset);
bool emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset);
bool emit_group_barrier(nir_intrinsic_instr *intr);
bool emit_shader_clock(nir_intrinsic_instr *instr);
bool emit_wait_ack();

View file

@ -168,6 +168,7 @@ void
VertexShader::do_get_shader_info(r600_shader *sh_info)
{
sh_info->processor_type = PIPE_SHADER_VERTEX;
sh_info->vs_vertexid = m_vertex_id != nullptr;
m_export_stage->get_shader_info(sh_info);
}
@ -439,8 +440,11 @@ VertexShader::do_scan_instruction(nir_instr *instr)
break;
}
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_vertex_id_zero_base:
m_sv_values.set(es_vertexid);
break;
case nir_intrinsic_load_first_vertex:
break;
case nir_intrinsic_load_instance_id:
m_sv_values.set(es_instanceid);
break;
@ -517,6 +521,7 @@ VertexShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
{
switch (intr->intrinsic) {
case nir_intrinsic_load_vertex_id:
case nir_intrinsic_load_vertex_id_zero_base:
return emit_simple_mov(intr->def, 0, m_vertex_id);
case nir_intrinsic_load_instance_id:
return emit_simple_mov(intr->def, 0, m_instance_id);