mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2026-01-07 00:00:12 +01:00
v3dv: implement VK_EXT_shader_demote_to_helper_invocation
Demoting means that we don't execute any writes to memory but otherwise the invocation continues to execute. Particularly, subgroup operations and derivatives must work. Our implementation of discard does exactly this by using setmsf to prevent writes for the affected invocations, the only difference for us is that with discard/terminate we want to be more careful with emitting quad loads for tmu operations, since the invocations are not supposed to be running any more and load offsets may not be valid, but with demote the invocations are not terminated and thus we should emit memory reads for them to ensure quad operations and derivatives from invocations that have not been demoted still work. Since we use the sample mask to implement demotes we can't tell whether a particular helper invocation was originally such (gl_HelperInvocation in GLSL) or was later demoted (OpIsHelperInvocationEXT added with SPV_EXT_demote_to_helper_invocation), so we use nir_lower_is_helper_invocation to take care of this. Reviewed-by: Alejandro Piñeiro <apinheiro@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26949>
This commit is contained in:
parent
658ce711d5
commit
5c42d6c62f
5 changed files with 27 additions and 5 deletions
|
|
@ -492,7 +492,7 @@ Vulkan 1.3 -- all DONE: anv, radv, tu, lvp, vn
|
|||
VK_EXT_pipeline_creation_feedback DONE (anv, hasvk, lvp, radv, tu, v3dv, vn)
|
||||
VK_EXT_private_data DONE (anv, hasvk, lvp, nvk, pvr, radv, tu, v3dv, vn)
|
||||
VK_EXT_image_robustness DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
|
||||
VK_EXT_shader_demote_to_helper_invocation DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
|
||||
VK_EXT_shader_demote_to_helper_invocation DONE (anv, hasvk, lvp, nvk, radv, tu, v3dv, vn)
|
||||
VK_EXT_subgroup_size_control DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
|
||||
VK_EXT_texel_buffer_alignment DONE (anv, hasvk, lvp, nvk, pvr, radv, tu, v3dv, vn)
|
||||
VK_EXT_texture_compression_astc_hdr DONE (vn)
|
||||
|
|
|
|||
|
|
@ -649,7 +649,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
|
|||
V3D_TMU_OP_TYPE_ATOMIC;
|
||||
|
||||
/* Only load per-quad if we can be certain that all
|
||||
* lines in the quad are active.
|
||||
* lines in the quad are active. Notice that demoted
|
||||
* invocations, unlike terminated ones, are still
|
||||
* active: we want to skip memory writes for them but
|
||||
* loads should still work.
|
||||
*/
|
||||
uint32_t perquad =
|
||||
is_load && !vir_in_nonuniform_control_flow(c) &&
|
||||
|
|
@ -1908,6 +1911,7 @@ emit_frag_end(struct v3d_compile *c)
|
|||
if (c->output_position_index == -1 &&
|
||||
!(c->s->info.num_images || c->s->info.num_ssbos) &&
|
||||
!c->s->info.fs.uses_discard &&
|
||||
!c->s->info.fs.uses_demote &&
|
||||
!c->fs_key->sample_alpha_to_coverage &&
|
||||
c->output_sample_mask_index == -1 &&
|
||||
has_any_tlb_color_write) {
|
||||
|
|
@ -3426,8 +3430,19 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
ntq_emit_image_size(c, instr);
|
||||
break;
|
||||
|
||||
/* FIXME: the Vulkan and SPIR-V specs specify that OpTerminate (which
|
||||
* is intended to match the semantics of GLSL's discard) should
|
||||
* terminate the invocation immediately. Our implementation doesn't
|
||||
* do that. What we do is actually a demote by removing the invocations
|
||||
* from the sample mask. Maybe we could be more strict and force an
|
||||
* early termination by emitting a (maybe conditional) jump to the
|
||||
* end section of the fragment shader for affected invocations.
|
||||
*/
|
||||
case nir_intrinsic_discard:
|
||||
case nir_intrinsic_terminate:
|
||||
c->emitted_discard = true;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_demote:
|
||||
ntq_flush_tmu(c);
|
||||
|
||||
if (vir_in_nonuniform_control_flow(c)) {
|
||||
|
|
@ -3440,11 +3455,13 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
vir_SETMSF_dest(c, vir_nop_reg(),
|
||||
vir_uniform_ui(c, 0));
|
||||
}
|
||||
c->emitted_discard = true;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_discard_if:
|
||||
case nir_intrinsic_terminate_if: {
|
||||
case nir_intrinsic_terminate_if:
|
||||
c->emitted_discard = true;
|
||||
FALLTHROUGH;
|
||||
case nir_intrinsic_demote_if: {
|
||||
ntq_flush_tmu(c);
|
||||
|
||||
enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, instr->src[0]);
|
||||
|
|
@ -3462,7 +3479,6 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
|
|||
|
||||
vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
|
||||
vir_uniform_ui(c, 0)), cond);
|
||||
c->emitted_discard = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -730,6 +730,7 @@ v3d_lower_nir(struct v3d_compile *c)
|
|||
nir_var_function_temp,
|
||||
0,
|
||||
glsl_get_natural_size_align_bytes);
|
||||
NIR_PASS(_, c->s, nir_lower_is_helper_invocation);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_scratch);
|
||||
NIR_PASS(_, c->s, v3d_nir_lower_null_pointers);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -205,6 +205,7 @@ get_device_extensions(const struct v3dv_physical_device *device,
|
|||
.EXT_private_data = true,
|
||||
.EXT_provoking_vertex = true,
|
||||
.EXT_separate_stencil_usage = true,
|
||||
.EXT_shader_demote_to_helper_invocation = true,
|
||||
.EXT_shader_module_identifier = true,
|
||||
.EXT_texel_buffer_alignment = true,
|
||||
.EXT_tooling_info = true,
|
||||
|
|
@ -444,6 +445,9 @@ get_features(const struct v3dv_physical_device *physical_device,
|
|||
|
||||
/* VK_KHR_shader_terminate_invocation */
|
||||
.shaderTerminateInvocation = true,
|
||||
|
||||
/* VK_EXT_shader_demote_to_helper_invocation */
|
||||
.shaderDemoteToHelperInvocation = true,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -174,6 +174,7 @@ static const struct spirv_to_nir_options default_spirv_options = {
|
|||
.physical_storage_buffer_address = true,
|
||||
.workgroup_memory_explicit_layout = true,
|
||||
.image_read_without_format = true,
|
||||
.demote_to_helper_invocation = true,
|
||||
},
|
||||
.ubo_addr_format = nir_address_format_32bit_index_offset,
|
||||
.ssbo_addr_format = nir_address_format_32bit_index_offset,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue