mirror of
https://gitlab.freedesktop.org/mesa/mesa.git
synced 2025-12-25 13:00:09 +01:00
ir3, turnip: Support VK_*_compute_shader_derivatives on a7xx
Quad derivative groups are supported since a7xx using the tiling mode bit. Linear derivative groups may also work on a6xx but I haven't tested it yet. Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/31130>
This commit is contained in:
parent
624d83bfd1
commit
022fb8e4c7
3 changed files with 58 additions and 4 deletions
|
|
@ -510,7 +510,7 @@ Khronos extensions that are not part of any Vulkan version:
|
|||
VK_KHR_acceleration_structure DONE (anv/gfx12.5+, lvp, radv/gfx10.3+)
|
||||
VK_KHR_android_surface not started
|
||||
VK_KHR_calibrated_timestamps DONE (anv, nvk, radv, tu/a750+)
|
||||
VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv)
|
||||
VK_KHR_compute_shader_derivatives DONE (anv, nvk, radv, tu/a7xx+)
|
||||
VK_KHR_cooperative_matrix DONE (anv, radv/gfx11+)
|
||||
VK_KHR_deferred_host_operations DONE (anv, hasvk, lvp, radv)
|
||||
VK_KHR_display DONE (anv, nvk, pvr, radv, tu, v3dv)
|
||||
|
|
|
|||
|
|
@ -602,16 +602,58 @@ lower_subgroup_id(nir_builder *b, nir_instr *instr, void *_shader)
|
|||
* LocalInvocationIndex here. This means that whenever we do this lowering we
|
||||
* have to force linear dispatch to make sure that the relation between
|
||||
* SubgroupId/SubgroupLocalInvocationId and LocalInvocationIndex is what we
|
||||
* expect.
|
||||
* expect, unless the shader forces us to do the quad layout in which case we
|
||||
* have to use the tiled layout.
|
||||
*/
|
||||
shader->cs.force_linear_dispatch = true;
|
||||
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
if (intr->intrinsic == nir_intrinsic_load_subgroup_id &&
|
||||
shader->nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
/* We have to manually figure out which subgroup we're in using the
|
||||
* tiling. The tiling is 4x4, unless one of the dimensions is not a
|
||||
* multiple of 4 in which case it drops to 2.
|
||||
*/
|
||||
nir_def *local_size = nir_load_workgroup_size(b);
|
||||
nir_def *local_size_x = nir_channel(b, local_size, 0);
|
||||
nir_def *local_size_y = nir_channel(b, local_size, 1);
|
||||
/* Calculate the shift from invocation to tile index for x and y */
|
||||
nir_def *x_shift = nir_bcsel(b,
|
||||
nir_ieq_imm(b,
|
||||
nir_iand_imm(b, local_size_x, 3),
|
||||
0),
|
||||
nir_imm_int(b, 2), nir_imm_int(b, 1));
|
||||
nir_def *y_shift = nir_bcsel(b,
|
||||
nir_ieq_imm(b,
|
||||
nir_iand_imm(b, local_size_y, 3),
|
||||
0),
|
||||
nir_imm_int(b, 2), nir_imm_int(b, 1));
|
||||
nir_def *id = nir_load_local_invocation_id(b);
|
||||
nir_def *id_x = nir_channel(b, id, 0);
|
||||
nir_def *id_y = nir_channel(b, id, 1);
|
||||
/* Calculate which tile we're in */
|
||||
nir_def *tile_id =
|
||||
nir_iadd(b, nir_imul24(b, nir_ishr(b, id_y, y_shift),
|
||||
nir_ishr(b, local_size_x, x_shift)),
|
||||
nir_ishr(b, id_x, x_shift));
|
||||
/* Finally calculate the subgroup id */
|
||||
return nir_ishr(b, tile_id, nir_isub(b,
|
||||
nir_load_subgroup_id_shift_ir3(b),
|
||||
nir_iadd(b, x_shift, y_shift)));
|
||||
}
|
||||
|
||||
/* Just use getfiberid if we have to use tiling */
|
||||
if (intr->intrinsic == nir_intrinsic_load_subgroup_invocation &&
|
||||
shader->nir->info.derivative_group == DERIVATIVE_GROUP_QUADS) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if (intr->intrinsic == nir_intrinsic_load_subgroup_invocation) {
|
||||
shader->cs.force_linear_dispatch = true;
|
||||
return nir_iand(
|
||||
b, nir_load_local_invocation_index(b),
|
||||
nir_iadd_imm(b, nir_load_subgroup_size(b), -1));
|
||||
} else if (intr->intrinsic == nir_intrinsic_load_subgroup_id) {
|
||||
shader->cs.force_linear_dispatch = true;
|
||||
return nir_ishr(b, nir_load_local_invocation_index(b),
|
||||
nir_load_subgroup_id_shift_ir3(b));
|
||||
} else {
|
||||
|
|
@ -771,6 +813,9 @@ ir3_nir_post_finalize(struct ir3_shader *shader)
|
|||
bool progress = false;
|
||||
NIR_PASS(progress, s, ir3_nir_lower_subgroup_id_cs, shader);
|
||||
|
||||
if (s->info.derivative_group == DERIVATIVE_GROUP_LINEAR)
|
||||
shader->cs.force_linear_dispatch = true;
|
||||
|
||||
/* ir3_nir_lower_subgroup_id_cs creates extra compute intrinsics which
|
||||
* we need to lower again.
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -148,6 +148,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.KHR_bind_memory2 = true,
|
||||
.KHR_buffer_device_address = true,
|
||||
.KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
|
||||
.KHR_compute_shader_derivatives = device->info->chip >= 7,
|
||||
.KHR_copy_commands2 = true,
|
||||
.KHR_create_renderpass2 = true,
|
||||
.KHR_dedicated_allocation = true,
|
||||
|
|
@ -308,6 +309,7 @@ get_device_extensions(const struct tu_physical_device *device,
|
|||
.GOOGLE_hlsl_functionality1 = true,
|
||||
.GOOGLE_user_type = true,
|
||||
.IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
|
||||
.NV_compute_shader_derivatives = device->info->chip >= 7,
|
||||
.VALVE_mutable_descriptor_type = true,
|
||||
} };
|
||||
|
||||
|
|
@ -455,6 +457,10 @@ tu_get_features(struct tu_physical_device *pdevice,
|
|||
features->shaderIntegerDotProduct = true;
|
||||
features->maintenance4 = true;
|
||||
|
||||
/* VK_KHR_compute_shader_derivatives */
|
||||
features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
|
||||
features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
|
||||
|
||||
/* VK_KHR_index_type_uint8 */
|
||||
features->indexTypeUint8 = true;
|
||||
|
||||
|
|
@ -1023,6 +1029,9 @@ tu_get_properties(struct tu_physical_device *pdevice,
|
|||
tu_get_physical_device_properties_1_2(pdevice, props);
|
||||
tu_get_physical_device_properties_1_3(pdevice, props);
|
||||
|
||||
/* VK_KHR_compute_shader_derivatives */
|
||||
props->meshAndTaskShaderDerivatives = false;
|
||||
|
||||
/* VK_KHR_push_descriptor */
|
||||
props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue