tu: Implement VK_KHR_shader_clock

There is a special address defined in kernel from which ALWAYSON counter could be read. Blob uses this sequence to read it: getone #l15 mov.s32s32 r2.y, -4096 mov.s32s32 r2.z, 131071 (rpt5)nop ldg.u32 r2.w, g[r2.y], 1 ldg.u32 r2.y, g[r2.y+4], 1 (sy)(ss)mov.s32s32 r48.x, (last)r2.w mov.s32s32 r48.y, (last)r2.y l15: Passes: dEQP-VK.glsl.shader_clock.* Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/29860>
2026-01-07 00:00:12 +01:00 · 2024-06-21 19:34:44 +02:00 · 2024-06-21 19:34:44 +02:00 · 0e9854a894
commit 0e9854a894
parent 4b1b4ee10c
3 changed files with 51 additions and 1 deletions
--- a/docs/features.txt
+++ b/docs/features.txt
@ -559,7 +559,7 @@ Khronos extensions that are not part of any Vulkan version:
  VK_KHR_ray_tracing_maintenance1                       DONE (anv/gfx12.5+, radv/gfx10.3+, tu/a740+, vn)
  VK_KHR_ray_tracing_pipeline                           DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, vn)
  VK_KHR_ray_tracing_position_fetch                     DONE (anv, radv/gfx10.3+, vn)
-  VK_KHR_shader_clock                                   DONE (anv, hasvk, lvp, nvk, radv, vn)
+  VK_KHR_shader_clock                                   DONE (anv, hasvk, lvp, nvk, radv, tu, vn)
  VK_KHR_shader_maximal_reconvergence                   DONE (anv, lvp, nvk, panvk/v10+, radv, vn)
  VK_KHR_shader_relaxed_extended_instruction            DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv, vn)
  VK_KHR_shader_subgroup_uniform_control_flow           DONE (anv, hasvk, nvk, panvk/v10+, radv, tu, vn)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@ -552,6 +552,47 @@ ir3_nir_lower_array_sampler(nir_shader *shader)
      nir_metadata_control_flow, NULL);
 }

+static bool
+lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data)
+{
+   if (instr->intrinsic != nir_intrinsic_shader_clock)
+      return false;
+
+   uint64_t uche_trap_base = *(uint64_t *)data;
+
+   b->cursor = nir_before_instr(&instr->instr);
+   nir_def *clock, *undef;
+
+   nir_push_if(b, nir_elect(b, 1));
+   {
+      /* ALWAYSON counter is mapped to this address. */
+      nir_def *base_addr =
+         nir_unpack_64_2x32(b, nir_imm_int64(b, uche_trap_base));
+      /* Reading _LO first presumably latches _HI making the read atomic. */
+      nir_def *clock_lo =
+         nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0));
+      nir_def *clock_hi =
+         nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 1));
+      clock = nir_vec2(b, clock_lo, clock_hi);
+   }
+   nir_push_else(b, NULL);
+   {
+      undef = nir_undef(b, 2, 32);
+   }
+   nir_pop_if(b, NULL);
+
+   clock = nir_read_first_invocation(b, nir_if_phi(b, clock, undef));
+   nir_def_replace(&instr->def, clock);
+   return true;
+}
+
+static bool
+ir3_nir_lower_shader_clock(nir_shader *shader, uint64_t uche_trap_base)
+{
+   return nir_shader_intrinsics_pass(shader, lower_shader_clock,
+                                     nir_metadata_none, &uche_trap_base);
+}
+
 void
 ir3_finalize_nir(struct ir3_compiler *compiler,
                 const struct ir3_shader_nir_options *options,
@ -594,6 +635,10 @@ ir3_finalize_nir(struct ir3_compiler *compiler,
   if (compiler->array_index_add_half)
      OPT(s, ir3_nir_lower_array_sampler);

+   if (compiler->gen >= 6) {
+      OPT(s, ir3_nir_lower_shader_clock, compiler->options.uche_trap_base);
+   }
+
   OPT(s, nir_lower_is_helper_invocation);

   ir3_optimize_loop(compiler, options, s);
--- a/src/freedreno/vulkan/tu_device.cc
+++ b/src/freedreno/vulkan/tu_device.cc
@ -223,6 +223,7 @@ get_device_extensions(const struct tu_physical_device *device,
      .KHR_sampler_ycbcr_conversion = true,
      .KHR_separate_depth_stencil_layouts = true,
      .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
+      .KHR_shader_clock = true,
      .KHR_shader_draw_parameters = true,
      .KHR_shader_expect_assume = true,
      .KHR_shader_float16_int8 = true,
@ -539,6 +540,10 @@ tu_get_features(struct tu_physical_device *pdevice,
   /* VK_KHR_present_wait */
   features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;

+   /* VK_KHR_shader_clock */
+   features->shaderSubgroupClock = true;
+   features->shaderDeviceClock = true;
+
   /* VK_KHR_shader_expect_assume */
   features->shaderExpectAssume = true;