From 0e9854a8940542cfa48774a2bf3cd954ae96cdd7 Mon Sep 17 00:00:00 2001 From: Danylo Piliaiev Date: Fri, 21 Jun 2024 19:34:44 +0200 Subject: [PATCH] tu: Implement VK_KHR_shader_clock There is a special address defined in kernel from which ALWAYSON counter could be read. Blob uses this sequence to read it: getone #l15 mov.s32s32 r2.y, -4096 mov.s32s32 r2.z, 131071 (rpt5)nop ldg.u32 r2.w, g[r2.y], 1 ldg.u32 r2.y, g[r2.y+4], 1 (sy)(ss)mov.s32s32 r48.x, (last)r2.w mov.s32s32 r48.y, (last)r2.y l15: Passes: dEQP-VK.glsl.shader_clock.* Signed-off-by: Danylo Piliaiev Part-of: --- docs/features.txt | 2 +- src/freedreno/ir3/ir3_nir.c | 45 +++++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_device.cc | 5 ++++ 3 files changed, 51 insertions(+), 1 deletion(-) diff --git a/docs/features.txt b/docs/features.txt index 796a09a2ca7..d29d556d274 100644 --- a/docs/features.txt +++ b/docs/features.txt @@ -559,7 +559,7 @@ Khronos extensions that are not part of any Vulkan version: VK_KHR_ray_tracing_maintenance1 DONE (anv/gfx12.5+, radv/gfx10.3+, tu/a740+, vn) VK_KHR_ray_tracing_pipeline DONE (anv/gfx12.5+, lvp, radv/gfx10.3+, vn) VK_KHR_ray_tracing_position_fetch DONE (anv, radv/gfx10.3+, vn) - VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, vn) + VK_KHR_shader_clock DONE (anv, hasvk, lvp, nvk, radv, tu, vn) VK_KHR_shader_maximal_reconvergence DONE (anv, lvp, nvk, panvk/v10+, radv, vn) VK_KHR_shader_relaxed_extended_instruction DONE (anv, hasvk, nvk, panvk, radv, tu, v3dv, vn) VK_KHR_shader_subgroup_uniform_control_flow DONE (anv, hasvk, nvk, panvk/v10+, radv, tu, vn) diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c index 587335084e5..c3c72d2c63f 100644 --- a/src/freedreno/ir3/ir3_nir.c +++ b/src/freedreno/ir3/ir3_nir.c @@ -552,6 +552,47 @@ ir3_nir_lower_array_sampler(nir_shader *shader) nir_metadata_control_flow, NULL); } +static bool +lower_shader_clock(struct nir_builder *b, nir_intrinsic_instr *instr, void *data) +{ + if (instr->intrinsic != nir_intrinsic_shader_clock) + return false; + + uint64_t uche_trap_base = *(uint64_t *)data; + + b->cursor = nir_before_instr(&instr->instr); + nir_def *clock, *undef; + + nir_push_if(b, nir_elect(b, 1)); + { + /* ALWAYSON counter is mapped to this address. */ + nir_def *base_addr = + nir_unpack_64_2x32(b, nir_imm_int64(b, uche_trap_base)); + /* Reading _LO first presumably latches _HI making the read atomic. */ + nir_def *clock_lo = + nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 0)); + nir_def *clock_hi = + nir_load_global_ir3(b, 1, 32, base_addr, nir_imm_int(b, 1)); + clock = nir_vec2(b, clock_lo, clock_hi); + } + nir_push_else(b, NULL); + { + undef = nir_undef(b, 2, 32); + } + nir_pop_if(b, NULL); + + clock = nir_read_first_invocation(b, nir_if_phi(b, clock, undef)); + nir_def_replace(&instr->def, clock); + return true; +} + +static bool +ir3_nir_lower_shader_clock(nir_shader *shader, uint64_t uche_trap_base) +{ + return nir_shader_intrinsics_pass(shader, lower_shader_clock, + nir_metadata_none, &uche_trap_base); +} + void ir3_finalize_nir(struct ir3_compiler *compiler, const struct ir3_shader_nir_options *options, @@ -594,6 +635,10 @@ ir3_finalize_nir(struct ir3_compiler *compiler, if (compiler->array_index_add_half) OPT(s, ir3_nir_lower_array_sampler); + if (compiler->gen >= 6) { + OPT(s, ir3_nir_lower_shader_clock, compiler->options.uche_trap_base); + } + OPT(s, nir_lower_is_helper_invocation); ir3_optimize_loop(compiler, options, s); diff --git a/src/freedreno/vulkan/tu_device.cc b/src/freedreno/vulkan/tu_device.cc index 4507bef02e9..299cf1c92f6 100644 --- a/src/freedreno/vulkan/tu_device.cc +++ b/src/freedreno/vulkan/tu_device.cc @@ -223,6 +223,7 @@ get_device_extensions(const struct tu_physical_device *device, .KHR_sampler_ycbcr_conversion = true, .KHR_separate_depth_stencil_layouts = true, .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics, + .KHR_shader_clock = true, .KHR_shader_draw_parameters = true, .KHR_shader_expect_assume = true, .KHR_shader_float16_int8 = true, @@ -539,6 +540,10 @@ tu_get_features(struct tu_physical_device *pdevice, /* VK_KHR_present_wait */ features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait; + /* VK_KHR_shader_clock */ + features->shaderSubgroupClock = true; + features->shaderDeviceClock = true; + /* VK_KHR_shader_expect_assume */ features->shaderExpectAssume = true;