r600: implement tes and tcs instanced gl_PrimitiveID support

This change extends r600_lds_constant_buffer to
implement a fully conformant gl_PrimitiveID at
the tes and tcs stages.

This change was tested on cayman and barts. Here are the tests fixed:
spec/arb_tessellation_shader/execution/tcs-primitiveid-instanced: fail pass
spec/arb_tessellation_shader/execution/tes-no-tcs-primitiveid-instanced: fail pass
spec/arb_tessellation_shader/execution/tes-primitiveid-instanced: fail pass
khr-gl4[4-6]/tessellation_shader/tessellation_shader_tessellation/gl_invocationid_patchverticesin_primitiveid: fail pass
khr-gles31/core/tessellation_shader/tessellation_shader_tessellation/gl_invocationid_patchverticesin_primitiveid: fail pass
khr-glesext/tessellation_shader/tessellation_shader_tessellation/gl_invocationid_patchverticesin_primitiveid: fail pass

Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/40297>
This commit is contained in:
Patrick Lerda 2026-03-09 13:07:54 +01:00 committed by Marge Bot
parent 48902771ad
commit 9815901f86
9 changed files with 102 additions and 14 deletions

View file

@ -1867,6 +1867,10 @@ intrinsic("load_local_shared_r600", src_comp=[0], dest_comp=0, indices = [], fla
store("local_shared_r600", [1], [WRITE_MASK])
store("tf_r600", [])
# r600 primitive_id
system_value("primitive_id_raw_r600", 1)
system_value("primitive_id_modulo_r600", 2)
# these two definitions are aimed at r600 indirect per_vertex_input accesses
intrinsic("r600_indirect_vertex_at_index", dest_comp=1, src_comp=[1], flags=[CAN_ELIMINATE, CAN_REORDER])
load("r600_per_vertex_input", [1], [BASE, RANGE, COMPONENT, DEST_TYPE, IO_SEMANTICS], [CAN_ELIMINATE, CAN_REORDER])

View file

@ -27,7 +27,6 @@ KHR-GLES31.core.shader_storage_buffer_object.advanced-readWrite-case1-cs,Fail
KHR-GLES31.core.tessellation_shader.single.max_patch_vertices,Crash
KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.TCS_TES,Fail
KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.gl_InvocationID_PatchVerticesIn_PrimitiveID,Fail
KHR-GLES31.core.tessellation_shader.tessellation_shader_tessellation.max_in_out_attributes,Fail
KHR-GLES31.core.texture_border_clamp.Texture2DArrayR32I,Fail
@ -784,9 +783,6 @@ spec@arb_tessellation_shader@arb_tessellation_shader-tes-gs-max-output -small -s
spec@arb_tessellation_shader@execution@fs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@gs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@tcs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@tes-no-tcs-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@tes-primitiveid-instanced,Fail
spec@arb_tessellation_shader@execution@trivial-tess-gs_no-gs-inputs,Fail

View file

@ -4970,7 +4970,8 @@ void evergreen_init_state_functions(struct r600_context *rctx)
void evergreen_setup_tess_constants(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned *num_patches,
const bool vertexid)
const bool vertexid,
const uint32_t primitiveid_modulo)
{
struct r600_pipe_shader_selector *tcs = rctx->tcs_shader ? rctx->tcs_shader : rctx->tes_shader;
struct r600_pipe_shader_selector *ls = rctx->vs_shader;
@ -5049,6 +5050,12 @@ void evergreen_setup_tess_constants(struct r600_context *rctx,
rctx->lds_constant_buffer.output_patch0_offset = output_patch0_offset;
rctx->lds_constant_buffer.perpatch_output_offset = perpatch_output_offset;
rctx->lds_constant_buffer.primitiveid_modulo = primitiveid_modulo;
rctx->lds_constant_buffer.primitiveid_inverse =
primitiveid_modulo == (uint32_t)(~0) ?
0 :
(((uint64_t)1) << 32) / primitiveid_modulo + 1;
/* docs say HS_NUM_WAVES - CEIL((LS_HS_CONFIG.NUM_PATCHES *
LS_HS_CONFIG.HS_NUM_OUTPUT_CP) / (NUM_GOOD_PIPES * 16)) */
num_waves = ceilf((float)(*num_patches * num_tcs_output_cp) / (float)wave_divisor);

View file

@ -487,6 +487,10 @@ struct r600_lds_constant_buffer {
uint32_t instance_base;
uint32_t vertex_base;
uint32_t draw_id;
/* gl_PrimitiveID instanced compatibility */
uint32_t primitiveid_modulo;
uint32_t primitiveid_inverse;
};
struct r600_context {
@ -827,7 +831,8 @@ void evergreen_dma_copy_buffer(struct r600_context *rctx,
void evergreen_setup_tess_constants(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned *num_patches,
const bool vertexid);
const bool vertexid,
const uint32_t primitiveid_modulo);
uint32_t evergreen_get_ls_hs_config(struct r600_context *rctx,
const struct pipe_draw_info *info,
unsigned num_patches);

View file

@ -2169,7 +2169,8 @@ r600_draw_parameters(struct r600_context *rctx,
const bool is_mapped,
const uint8_t **indirect_ptr,
unsigned *num_patches,
unsigned *cs_space)
unsigned *cs_space,
const uint32_t primitiveid_modulo)
{
const bool draw_parameters_enabled =
rctx->vs_shader->current->shader.vs_draw_parameters_enabled;
@ -2214,7 +2215,7 @@ r600_draw_parameters(struct r600_context *rctx,
*cs_space += R600_DRAW_PARAMETERS_DRAW_INDIRECT_CS * indirect->draw_count;
}
evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled);
evergreen_setup_tess_constants(rctx, info, num_patches, draw_parameters_enabled, primitiveid_modulo);
return unlikely(indirect) ?
indirect->draw_count :
@ -2728,7 +2729,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
false,
&indirect_ptr,
&num_patches,
&cs_space);
&cs_space,
unlikely(info->instance_count > 1 && rctx->patch_vertices) ?
draws[0].count / rctx->patch_vertices :
~0);
r600_indirect_parameters_init(rctx,
cs,
indirect,
@ -2988,7 +2992,8 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
true,
&indirect_ptr,
&num_patches,
&cs_space);
&cs_space,
~0);
assert(radeon_check_cs(rctx, cs) || true);

View file

@ -737,6 +737,47 @@ r600_finalize_nir_common(nir_shader *nir, enum amd_gfx_level gfx_level)
;
}
static bool
r600_lower_primitive_id_pass(nir_builder *b,
nir_intrinsic_instr *intrin,
UNUSED void *cb_data)
{
if (intrin->intrinsic != nir_intrinsic_load_primitive_id)
return false;
b->cursor = nir_instr_remove(&intrin->instr);
assert(offsetof(struct r600_lds_constant_buffer, primitiveid_inverse) -
offsetof(struct r600_lds_constant_buffer, primitiveid_modulo) ==
sizeof(uint32_t));
nir_def *const primitive_id_raw = nir_load_primitive_id_raw_r600(b);
nir_def *const primitive_id_vec2 = nir_load_primitive_id_modulo_r600(b);
nir_def *const primitive_id_modulo = nir_channel(b, primitive_id_vec2, 0);
nir_def *const primitive_id_inverse = nir_channel(b, primitive_id_vec2, 1);
/* This transformation is equivalent to:
* nir_umod(b, primitive_id_raw, primitive_id_modulo); */
nir_def_rewrite_uses(
&intrin->def,
nir_isub(b,
primitive_id_raw,
nir_imul(b,
nir_umul_high(b, primitive_id_raw, primitive_id_inverse),
primitive_id_modulo)));
return true;
}
static bool
r600_lower_primitive_id(nir_shader *nir)
{
return nir_shader_intrinsics_pass(nir,
r600_lower_primitive_id_pass,
nir_metadata_control_flow,
nullptr);
}
DEBUG_GET_ONCE_NUM_OPTION(skip_opt_start, "R600_SFN_SKIP_OPT_START", -1);
DEBUG_GET_ONCE_NUM_OPTION(skip_opt_end, "R600_SFN_SKIP_OPT_END", -1);
DEBUG_GET_ONCE_NUM_OPTION(skip_ra_start, "R600_SFN_SKIP_RA_START", -1);
@ -766,6 +807,12 @@ r600_lower_and_optimize_nir(nir_shader *sh,
NIR_PASS(_, sh, nir_remove_dead_variables, nir_var_shader_out, 0);
r600::sort_fsoutput(sh);
}
if (sh->info.stage == MESA_SHADER_TESS_CTRL ||
sh->info.stage == MESA_SHADER_TESS_EVAL) {
NIR_PASS(_, sh, r600_lower_primitive_id);
}
nir_variable_mode io_modes = nir_var_uniform | nir_var_shader_in | nir_var_shader_out;
NIR_PASS(_, sh, nir_opt_combine_stores, nir_var_shader_out);

View file

@ -930,6 +930,10 @@ Shader::process_intrinsic(nir_intrinsic_instr *intr)
case nir_intrinsic_load_draw_id:
return emit_get_lds_info_uint(intr,
offsetof(struct r600_lds_constant_buffer, draw_id));
case nir_intrinsic_load_primitive_id_modulo_r600:
return emit_get_lds_info_uint2(intr,
offsetof(struct r600_lds_constant_buffer,
primitiveid_modulo));
case nir_intrinsic_barrier:
return emit_barrier(intr);
case nir_intrinsic_shared_atomic:
@ -1544,6 +1548,25 @@ Shader::emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset)
return true;
}
bool
Shader::emit_get_lds_info_uint2(nir_intrinsic_instr *instr, int offset)
{
auto src = value_factory().temp_register();
emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::write));
auto dest = value_factory().dest_vec4(instr->def, pin_group);
auto fetch = new LoadFromBuffer(dest,
{0, 1, 7, 7},
src,
offset,
R600_LDS_INFO_CONST_BUFFER,
nullptr,
fmt_32_32);
emit_instruction(fetch);
return true;
}
bool
Shader::emit_shader_clock(nir_intrinsic_instr *instr)
{

View file

@ -317,6 +317,7 @@ private:
bool emit_local_load(nir_intrinsic_instr *instr);
bool emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset);
bool emit_get_lds_info_uint(nir_intrinsic_instr *instr, int offset);
bool emit_get_lds_info_uint2(nir_intrinsic_instr *instr, int offset);
bool emit_group_barrier(nir_intrinsic_instr *intr);
bool emit_shader_clock(nir_intrinsic_instr *instr);
bool emit_wait_ack();

View file

@ -30,7 +30,7 @@ TCSShader::do_scan_instruction(nir_instr *instr)
nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
switch (ii->intrinsic) {
case nir_intrinsic_load_primitive_id:
case nir_intrinsic_load_primitive_id_raw_r600:
m_sv_values.set(es_primitive_id);
break;
case nir_intrinsic_load_invocation_id:
@ -80,7 +80,7 @@ TCSShader::process_stage_intrinsic(nir_intrinsic_instr *instr)
return emit_simple_mov(instr->def, 0, m_rel_patch_id);
case nir_intrinsic_load_invocation_id:
return emit_simple_mov(instr->def, 0, m_invocation_id);
case nir_intrinsic_load_primitive_id:
case nir_intrinsic_load_primitive_id_raw_r600:
return emit_simple_mov(instr->def, 0, m_primitive_id);
case nir_intrinsic_load_tcs_tess_factor_base_r600:
return emit_simple_mov(instr->def, 0, m_tess_factor_base);
@ -158,7 +158,7 @@ TESShader::do_scan_instruction(nir_instr *instr)
case nir_intrinsic_load_tess_coord_xy:
m_sv_values.set(es_tess_coord);
break;
case nir_intrinsic_load_primitive_id:
case nir_intrinsic_load_primitive_id_raw_r600:
m_sv_values.set(es_primitive_id);
break;
case nir_intrinsic_load_tcs_rel_patch_id_r600:
@ -208,7 +208,7 @@ TESShader::process_stage_intrinsic(nir_intrinsic_instr *intr)
case nir_intrinsic_load_tess_coord_xy:
return emit_simple_mov(intr->def, 0, m_tess_coord[0], pin_none) &&
emit_simple_mov(intr->def, 1, m_tess_coord[1], pin_none);
case nir_intrinsic_load_primitive_id:
case nir_intrinsic_load_primitive_id_raw_r600:
return emit_simple_mov(intr->def, 0, m_primitive_id);
case nir_intrinsic_load_tcs_rel_patch_id_r600:
return emit_simple_mov(intr->def, 0, m_rel_patch_id);