tu: Implement VK_KHR_shader_clock

There is a special address defined in kernel from which ALWAYSON
counter could be read.

Blob uses this sequence to read it:
  getone #l15
  mov.s32s32 r2.y, -4096
  mov.s32s32 r2.z, 131071
  (rpt5)nop
  ldg.u32 r2.w, g[r2.y], 1
  ldg.u32 r2.y, g[r2.y+4], 1
  (sy)(ss)mov.s32s32 r48.x, (last)r2.w
  mov.s32s32 r48.y, (last)r2.y
  l15:

Passes:
 dEQP-VK.glsl.shader_clock.*

Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29860>
This commit is contained in:
Danylo Piliaiev 2024-06-21 19:34:44 +02:00 committed by Marge Bot
parent 4b1b4ee10c
commit 0e9854a894
3 changed files with 51 additions and 1 deletions

View file

@ -559,7 +559,7 @@ Khronos extensions that are not part of any Vulkan version:
VK_KHR_ray_tracing_maintenance1 DONE (anv/gfx12.5+, radv/gfx10.3+, tu/a740+, vn)
VK_KHR_ray_tracing_pipeline DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, vn)
VK_KHR_ray_tracing_position_fetch DONE (anv, radv/gfx10.3+, vn)
VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, vn)
VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
VK_KHR_shader_maximal_reconvergence DONE (anv, lvp, nvk, panvk/v10+, radv, vn)
VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv, vn)
VK_KHR_shader_subgroup_uniform_control_flow DONE (anv, hasvk, nvk, panvk/v10+, radv, tu, vn)

View file

@ -552,6 +552,47 @@ ir3_nir_lower_array_sampler(nir_shader *shader)
nir_metadata_control_flow, NULL);
}
static bool
lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
{
if (instr->intrinsic != nir_intrinsic_shader_clock)
return false;
uint64_t uche_trap_base = *(uint64_t *)data;
b->cursor = nir_before_instr(&instr->instr);
nir_def *clock, *undef;
nir_push_if(b, nir_elect(b, 1));
{
/* ALWAYSON counter is mapped to this address. */
nir_def *base_addr =
nir_unpack_64_2x32(b, nir_imm_int64(b, uche_trap_base));
/* Reading _LO first presumably latches _HI making the read atomic. */
nir_def *clock_lo =
nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0));
nir_def *clock_hi =
nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 1));
clock = nir_vec2(b, clock_lo, clock_hi);
}
nir_push_else(b, NULL);
{
undef = nir_undef(b, 2, 32);
}
nir_pop_if(b, NULL);
clock = nir_read_first_invocation(b, nir_if_phi(b, clock, undef));
nir_def_replace(&instr->def, clock);
return true;
}
static bool
ir3_nir_lower_shader_clock(nir_shader *shader, uint64_t uche_trap_base)
{
return nir_shader_intrinsics_pass(shader, lower_shader_clock,
nir_metadata_none, &uche_trap_base);
}
void
ir3_finalize_nir(struct ir3_compiler *compiler,
const struct ir3_shader_nir_options *options,
@ -594,6 +635,10 @@ ir3_finalize_nir(struct ir3_compiler *compiler,
if (compiler->array_index_add_half)
OPT(s, ir3_nir_lower_array_sampler);
if (compiler->gen >= 6) {
OPT(s, ir3_nir_lower_shader_clock, compiler->options.uche_trap_base);
}
OPT(s, nir_lower_is_helper_invocation);
ir3_optimize_loop(compiler, options, s);

View file

@ -223,6 +223,7 @@ get_device_extensions(const struct tu_physical_device *device,
.KHR_sampler_ycbcr_conversion = true,
.KHR_separate_depth_stencil_layouts = true,
.KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
.KHR_shader_clock = true,
.KHR_shader_draw_parameters = true,
.KHR_shader_expect_assume = true,
.KHR_shader_float16_int8 = true,
@ -539,6 +540,10 @@ tu_get_features(struct tu_physical_device *pdevice,
/* VK_KHR_present_wait */
features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
/* VK_KHR_shader_clock */
features->shaderSubgroupClock = true;
features->shaderDeviceClock = true;
/* VK_KHR_shader_expect_assume */
features->shaderExpectAssume = true;