pan/bi: Enable ARB_shader_clock extension support

Introduces GCLK instruction to read the system timer/counter. Currently
only counter is supported.

Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/34342>
This commit is contained in:
Ashley Smith 2025-04-02 11:34:05 +01:00 committed by Marge Bot
parent f0c0997277
commit c1ce2dcc66
6 changed files with 43 additions and 3 deletions

View file

@ -307,7 +307,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_sample_locations DONE (freedreno/a6xx, nvc0, zink)
GL_ARB_seamless_cubemap_per_texture DONE (etnaviv/SEAMLESS_CUBE_MAP, freedreno, nvc0, r600, radeonsi, softpipe, virgl, zink, asahi, iris, crocus)
GL_ARB_shader_ballot DONE (nvc0, radeonsi, zink, iris, crocus/gen8, d3d12, asahi)
GL_ARB_shader_clock DONE (freedreno/a6xx, nv50, nvc0, r600, radeonsi, llvmpipe, virgl, zink, iris, crocus/gen7+)
GL_ARB_shader_clock DONE (freedreno/a6xx, nv50, nvc0, r600, radeonsi, llvmpipe, virgl, panfrost, zink, iris, crocus/gen7+)
GL_ARB_shader_stencil_export DONE (r600, radeonsi, softpipe, llvmpipe, virgl, panfrost, zink, asahi, iris/gen9+)
GL_ARB_shader_viewport_layer_array DONE (freedreno/a6xx, nvc0, radeonsi, zink, iris, crocus/gen6+, asahi)
GL_ARB_shading_language_include DONE

View file

@ -28,4 +28,5 @@ KHR_partial_update on etnaviv
VK_KHR_line_rasterization on panvk
shaderImageGatherExtended on panvk
textureCompressionBC on panvk
VK_EXT_sample_locations on RADV for GFX10+
VK_EXT_sample_locations on RADV for GFX10+
GL_ARB_shader_clock on panvk

View file

@ -514,7 +514,7 @@ panfrost_init_screen_caps(struct panfrost_screen *screen)
* work to turn on, since CYCLE_COUNT_START needs to be issued. In
* kbase, userspace requests this via BASE_JD_REQ_PERMON. There is not
* yet way to request this with mainline TODO */
caps->shader_clock = false;
caps->shader_clock = dev->arch >= 6;
caps->vs_instanceid = true;
caps->texture_multisample = true;

View file

@ -2187,6 +2187,7 @@ bi_emit_intrinsic(bi_builder *b, nir_intrinsic_instr *instr)
break;
case nir_intrinsic_shader_clock:
assert(nir_intrinsic_memory_scope(instr) == SCOPE_SUBGROUP);
bi_ld_gclk_u64_to(b, dst, BI_SOURCE_CYCLE_COUNTER);
bi_split_def(b, &instr->def);
break;

View file

@ -733,6 +733,20 @@
<value desc="Retrieve previous state">retrieve</value>
</enum>
<enum name="Source">
<desc>
Clock source for LD_GCLK instruction.
</desc>
<reserved/>
<reserved/>
<reserved/>
<reserved/>
<reserved/>
<reserved/>
<value desc="Cycle counter">cycle_counter</value>
<value desc="System timestamp">system_timestamp</value>
</enum>
<enum name="Source format">
<desc>
In-memory format of varyings.
@ -1018,6 +1032,13 @@
<src>Index and table</src>
</ins>
<ins name="LD_GCLK_U64" title="Global clock load" opcode="0x44" unused="true" unit="LS">
<desc>Load the 64-bit global clock, either a cycle counter or the system clock.</desc>
<sr write="true"/>
<sr_count count="2"/>
<slot/>
</ins>
<ins name="LD_TEX_IMM" title="Load immediate texture" opcode="0x66" opcode2="1" message="attribute" unit="LS">
<desc>
Load `vecsize` components from the texture descriptor at entry `index`

View file

@ -677,6 +677,19 @@ va_pack_byte_offset_8(const bi_instr *I)
return ((uint64_t)offset) << 8;
}
static uint64_t
va_pack_gclk(const bi_instr *I)
{
switch (I->source) {
case BI_SOURCE_CYCLE_COUNTER:
return VA_SOURCE_CYCLE_COUNTER;
case BI_SOURCE_SYSTEM_TIMESTAMP:
return VA_SOURCE_SYSTEM_TIMESTAMP;
}
invalid_instruction(I, "source");
}
static uint64_t
va_pack_load(const bi_instr *I, bool buffer_descriptor)
{
@ -920,6 +933,10 @@ va_pack_instr(const bi_instr *I, unsigned arch)
break;
}
case BI_OPCODE_LD_GCLK_U64:
hex |= va_pack_gclk(I);
break;
case BI_OPCODE_TEX_GRADIENT:
case BI_OPCODE_TEX_SINGLE:
case BI_OPCODE_TEX_FETCH: